diff --git a/Add-riscv64-support.patch b/Add-riscv64-support.patch
index 59017ae02c5b6c185a441f428acd08dfc203eb6e..7e2d50c2e6427d152e7f6a395a5bcf81883b5a40 100644
--- a/Add-riscv64-support.patch
+++ b/Add-riscv64-support.patch
@@ -1,346 +1,204 @@
-From dfa792539047c39d0d25244265bc8368163d5768 Mon Sep 17 00:00:00 2001
-From: Fei Yang <fyang@openjdk.org>
-Date: Thu, 24 Mar 2022 09:22:46 +0000
-Subject: [PATCH 001/140] Cherry-picked JDK-8276799: initial load of RISC-V
- backend (cannot pass compilation)
-
----
- make/autoconf/build-aux/config.guess          |     2 +-
- make/autoconf/hotspot.m4                      |     3 +-
- make/autoconf/libraries.m4                    |     8 +-
- make/autoconf/platform.m4                     |     6 +-
- make/hotspot/gensrc/GensrcAdlc.gmk            |     9 +-
- .../cpu/aarch64/c1_LIRAssembler_aarch64.cpp   |     6 +-
- src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp   |     7 +-
- src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp   |     8 +-
- .../cpu/riscv/abstractInterpreter_riscv.cpp   |   177 +
- src/hotspot/cpu/riscv/assembler_riscv.cpp     |   372 +
- src/hotspot/cpu/riscv/assembler_riscv.hpp     |  3047 +++++
- .../cpu/riscv/assembler_riscv.inline.hpp      |    47 +
- src/hotspot/cpu/riscv/bytes_riscv.hpp         |   167 +
- src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp  |   353 +
- src/hotspot/cpu/riscv/c1_Defs_riscv.hpp       |    84 +
- .../cpu/riscv/c1_FpuStackSim_riscv.cpp        |    30 +
- .../cpu/riscv/c1_FpuStackSim_riscv.hpp        |    32 +
- src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp   |   388 +
- src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp   |   148 +
- .../cpu/riscv/c1_LIRAssembler_arith_riscv.cpp |   281 +
- .../cpu/riscv/c1_LIRAssembler_arith_riscv.hpp |    37 +
- .../riscv/c1_LIRAssembler_arraycopy_riscv.cpp |   388 +
- .../riscv/c1_LIRAssembler_arraycopy_riscv.hpp |    52 +
- .../cpu/riscv/c1_LIRAssembler_riscv.cpp       |  2267 ++++
- .../cpu/riscv/c1_LIRAssembler_riscv.hpp       |   132 +
- .../cpu/riscv/c1_LIRGenerator_riscv.cpp       |  1075 ++
- src/hotspot/cpu/riscv/c1_LIR_riscv.cpp        |    55 +
- src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp |    33 +
- src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp |    83 +
- .../cpu/riscv/c1_MacroAssembler_riscv.cpp     |   432 +
- .../cpu/riscv/c1_MacroAssembler_riscv.hpp     |   120 +
- src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp   |  1172 ++
- src/hotspot/cpu/riscv/c1_globals_riscv.hpp    |    65 +
- .../cpu/riscv/c2_MacroAssembler_riscv.cpp     |  1646 +++
- .../cpu/riscv/c2_MacroAssembler_riscv.hpp     |   193 +
- src/hotspot/cpu/riscv/c2_globals_riscv.hpp    |    83 +
- src/hotspot/cpu/riscv/c2_init_riscv.cpp       |    38 +
- .../riscv/c2_safepointPollStubTable_riscv.cpp |    47 +
- src/hotspot/cpu/riscv/codeBuffer_riscv.hpp    |    36 +
- src/hotspot/cpu/riscv/compiledIC_riscv.cpp    |   149 +
- src/hotspot/cpu/riscv/copy_riscv.hpp          |   136 +
- src/hotspot/cpu/riscv/disassembler_riscv.hpp  |    58 +
- .../cpu/riscv/foreign_globals_riscv.cpp       |    44 +
- .../cpu/riscv/foreign_globals_riscv.hpp       |    32 +
- src/hotspot/cpu/riscv/frame_riscv.cpp         |   697 +
- src/hotspot/cpu/riscv/frame_riscv.hpp         |   202 +
- src/hotspot/cpu/riscv/frame_riscv.inline.hpp  |   248 +
- .../gc/g1/g1BarrierSetAssembler_riscv.cpp     |   484 +
- .../gc/g1/g1BarrierSetAssembler_riscv.hpp     |    78 +
- .../cpu/riscv/gc/g1/g1Globals_riscv.hpp       |    31 +
- .../gc/shared/barrierSetAssembler_riscv.cpp   |   302 +
- .../gc/shared/barrierSetAssembler_riscv.hpp   |    79 +
- .../gc/shared/barrierSetNMethod_riscv.cpp     |   171 +
- .../cardTableBarrierSetAssembler_riscv.cpp    |   111 +
- .../cardTableBarrierSetAssembler_riscv.hpp    |    42 +
- .../modRefBarrierSetAssembler_riscv.cpp       |    55 +
- .../modRefBarrierSetAssembler_riscv.hpp       |    55 +
- .../c1/shenandoahBarrierSetC1_riscv.cpp       |   117 +
- .../shenandoahBarrierSetAssembler_riscv.cpp   |   712 ++
- .../shenandoahBarrierSetAssembler_riscv.hpp   |    88 +
- .../riscv/gc/shenandoah/shenandoah_riscv64.ad |   285 +
- .../riscv/gc/z/zBarrierSetAssembler_riscv.cpp |   441 +
- .../riscv/gc/z/zBarrierSetAssembler_riscv.hpp |   101 +
- src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp |   212 +
- src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp |    36 +
- src/hotspot/cpu/riscv/gc/z/z_riscv64.ad       |   233 +
- .../cpu/riscv/globalDefinitions_riscv.hpp     |    52 +
- src/hotspot/cpu/riscv/globals_riscv.hpp       |    99 +
- src/hotspot/cpu/riscv/icBuffer_riscv.cpp      |    79 +
- src/hotspot/cpu/riscv/icache_riscv.cpp        |    51 +
- src/hotspot/cpu/riscv/icache_riscv.hpp        |    42 +
- src/hotspot/cpu/riscv/interp_masm_riscv.cpp   |  1940 +++
- src/hotspot/cpu/riscv/interp_masm_riscv.hpp   |   285 +
- src/hotspot/cpu/riscv/interpreterRT_riscv.cpp |   295 +
- src/hotspot/cpu/riscv/interpreterRT_riscv.hpp |    68 +
- .../cpu/riscv/javaFrameAnchor_riscv.hpp       |    86 +
- .../cpu/riscv/jniFastGetField_riscv.cpp       |   214 +
- src/hotspot/cpu/riscv/jniTypes_riscv.hpp      |   106 +
- .../cpu/riscv/macroAssembler_riscv.cpp        |  4016 ++++++
- .../cpu/riscv/macroAssembler_riscv.hpp        |   858 ++
- .../cpu/riscv/macroAssembler_riscv.inline.hpp |    31 +
- src/hotspot/cpu/riscv/matcher_riscv.hpp       |   169 +
- src/hotspot/cpu/riscv/methodHandles_riscv.cpp |   461 +
- src/hotspot/cpu/riscv/methodHandles_riscv.hpp |    57 +
- src/hotspot/cpu/riscv/nativeInst_riscv.cpp    |   429 +
- src/hotspot/cpu/riscv/nativeInst_riscv.hpp    |   572 +
- src/hotspot/cpu/riscv/registerMap_riscv.cpp   |    45 +
- src/hotspot/cpu/riscv/registerMap_riscv.hpp   |    43 +
- src/hotspot/cpu/riscv/register_riscv.cpp      |    73 +
- src/hotspot/cpu/riscv/register_riscv.hpp      |   324 +
- src/hotspot/cpu/riscv/relocInfo_riscv.cpp     |   113 +
- src/hotspot/cpu/riscv/relocInfo_riscv.hpp     |    44 +
- src/hotspot/cpu/riscv/riscv.ad                | 10611 ++++++++++++++++
- src/hotspot/cpu/riscv/riscv_b.ad              |   527 +
- src/hotspot/cpu/riscv/riscv_v.ad              |  2065 +++
- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp |  2761 ++++
- src/hotspot/cpu/riscv/stubGenerator_riscv.cpp |  3864 ++++++
- src/hotspot/cpu/riscv/stubRoutines_riscv.cpp  |    58 +
- src/hotspot/cpu/riscv/stubRoutines_riscv.hpp  |   161 +
- .../templateInterpreterGenerator_riscv.cpp    |  1794 +++
- src/hotspot/cpu/riscv/templateTable_riscv.cpp |  3951 ++++++
- src/hotspot/cpu/riscv/templateTable_riscv.hpp |    42 +
- .../riscv/universalNativeInvoker_riscv.cpp    |    33 +
- .../cpu/riscv/universalUpcallHandle_riscv.cpp |    42 +
- src/hotspot/cpu/riscv/vmStructs_riscv.hpp     |    42 +
- src/hotspot/cpu/riscv/vm_version_riscv.cpp    |   230 +
- src/hotspot/cpu/riscv/vm_version_riscv.hpp    |    72 +
- src/hotspot/cpu/riscv/vmreg_riscv.cpp         |    64 +
- src/hotspot/cpu/riscv/vmreg_riscv.hpp         |    68 +
- src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp  |    46 +
- src/hotspot/cpu/riscv/vtableStubs_riscv.cpp   |   260 +
- src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp |     9 +-
- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp   |     7 +-
- src/hotspot/os/linux/os_linux.cpp             |     2 +
- .../linux_riscv/assembler_linux_riscv.cpp     |    26 +
- .../os_cpu/linux_riscv/atomic_linux_riscv.hpp |   134 +
- .../os_cpu/linux_riscv/bytes_linux_riscv.hpp  |    45 +
- .../os_cpu/linux_riscv/copy_linux_riscv.hpp   |    31 +
- .../linux_riscv/gc/z/zSyscall_linux_riscv.hpp |    42 +
- .../linux_riscv/globals_linux_riscv.hpp       |    43 +
- .../linux_riscv/orderAccess_linux_riscv.hpp   |    63 +
- .../os_cpu/linux_riscv/os_linux_riscv.cpp     |   466 +
- .../os_cpu/linux_riscv/os_linux_riscv.hpp     |    59 +
- .../prefetch_linux_riscv.inline.hpp           |    38 +
- .../os_cpu/linux_riscv/thread_linux_riscv.cpp |    92 +
- .../os_cpu/linux_riscv/thread_linux_riscv.hpp |    48 +
- .../linux_riscv/vmStructs_linux_riscv.hpp     |    55 +
- .../linux_riscv/vm_version_linux_riscv.cpp    |   118 +
- src/hotspot/share/c1/c1_LIR.cpp               |   112 +-
- src/hotspot/share/c1/c1_LIR.hpp               |   209 +-
- src/hotspot/share/c1/c1_LIRAssembler.cpp      |    15 +-
- src/hotspot/share/c1/c1_LIRAssembler.hpp      |     5 +-
- src/hotspot/share/c1/c1_LinearScan.cpp        |    18 +-
- .../gc/shenandoah/shenandoahArguments.cpp     |     4 +-
- src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp   |     4 +-
- .../share/jfr/utilities/jfrBigEndian.hpp      |     2 +-
- src/hotspot/share/opto/regmask.hpp            |     2 +-
- .../share/runtime/abstract_vm_version.cpp     |     3 +-
- src/hotspot/share/runtime/synchronizer.cpp    |     2 +-
- src/hotspot/share/runtime/thread.hpp          |     2 +-
- src/hotspot/share/runtime/thread.inline.hpp   |     4 +-
- src/hotspot/share/utilities/macros.hpp        |    26 +
- .../native/libsaproc/LinuxDebuggerLocal.c     |    49 +-
- .../linux/native/libsaproc/libproc.h          |     4 +-
- .../classes/sun/jvm/hotspot/HotSpotAgent.java |     3 +
- .../debugger/MachineDescriptionRISCV64.java   |    40 +
- .../debugger/linux/LinuxCDebugger.java        |    13 +-
- .../linux/riscv64/LinuxRISCV64CFrame.java     |    90 +
- .../riscv64/LinuxRISCV64ThreadContext.java    |    48 +
- .../proc/riscv64/ProcRISCV64Thread.java       |    88 +
- .../riscv64/ProcRISCV64ThreadContext.java     |    48 +
- .../riscv64/ProcRISCV64ThreadFactory.java     |    46 +
- .../remote/riscv64/RemoteRISCV64Thread.java   |    55 +
- .../riscv64/RemoteRISCV64ThreadContext.java   |    48 +
- .../riscv64/RemoteRISCV64ThreadFactory.java   |    46 +
- .../debugger/risv64/RISCV64ThreadContext.java |   172 +
- .../sun/jvm/hotspot/runtime/Threads.java      |     5 +-
- .../LinuxRISCV64JavaThreadPDAccess.java       |   134 +
- .../riscv64/RISCV64CurrentFrameGuess.java     |   223 +
- .../hotspot/runtime/riscv64/RISCV64Frame.java |   556 +
- .../riscv64/RISCV64JavaCallWrapper.java       |    61 +
- .../runtime/riscv64/RISCV64RegisterMap.java   |    53 +
- .../jvm/hotspot/utilities/PlatformInfo.java   |     4 +-
- test/hotspot/jtreg/compiler/c2/TestBit.java   |     7 +-
- ...eSHA1IntrinsicsOptionOnUnsupportedCPU.java |     5 +-
- ...HA256IntrinsicsOptionOnUnsupportedCPU.java |     5 +-
- ...HA512IntrinsicsOptionOnUnsupportedCPU.java |     5 +-
- .../cli/TestUseSHAOptionOnUnsupportedCPU.java |     5 +-
- .../testcases/GenericTestCaseForOtherCPU.java |    11 +-
- ...nericTestCaseForUnsupportedRISCV64CPU.java |   115 +
- .../loopopts/superword/ProdRed_Double.java    |     4 +-
- .../loopopts/superword/ProdRed_Float.java     |     4 +-
- .../loopopts/superword/ProdRed_Int.java       |     4 +-
- .../loopopts/superword/ReductionPerf.java     |     4 +-
- .../superword/SumRedAbsNeg_Double.java        |     4 +-
- .../superword/SumRedAbsNeg_Float.java         |     4 +-
- .../loopopts/superword/SumRedSqrt_Double.java |     4 +-
- .../loopopts/superword/SumRed_Double.java     |     4 +-
- .../loopopts/superword/SumRed_Float.java      |     4 +-
- .../loopopts/superword/SumRed_Int.java        |     4 +-
- .../sha/predicate/IntrinsicPredicates.java    |    11 +-
- .../NMT/CheckForProperDetailStackTrace.java   |     4 +-
- .../ReservedStack/ReservedStackTest.java      |     4 +-
- .../HeapMonitorEventsForTwoThreadsTest.java   |     1 -
- ...stMutuallyExclusivePlatformPredicates.java |     2 +-
- .../jdk/jfr/event/os/TestCPUInformation.java  |     6 +-
- test/lib/jdk/test/lib/Platform.java           |     4 +
- 187 files changed, 59079 insertions(+), 189 deletions(-)
- create mode 100644 src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/assembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/assembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/assembler_riscv.inline.hpp
- create mode 100644 src/hotspot/cpu/riscv/bytes_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_Defs_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_LIR_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_globals_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c2_globals_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c2_init_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/codeBuffer_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/compiledIC_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/copy_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/disassembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/foreign_globals_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/foreign_globals_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/frame_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/frame_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/frame_riscv.inline.hpp
- create mode 100644 src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
- create mode 100644 src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/gc/z/z_riscv64.ad
- create mode 100644 src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/globals_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/icBuffer_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/icache_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/icache_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/interp_masm_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/interp_masm_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/interpreterRT_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/jniTypes_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp
- create mode 100644 src/hotspot/cpu/riscv/matcher_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/methodHandles_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/methodHandles_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/nativeInst_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/nativeInst_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/registerMap_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/registerMap_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/register_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/register_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/relocInfo_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/relocInfo_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/riscv.ad
- create mode 100644 src/hotspot/cpu/riscv/riscv_b.ad
- create mode 100644 src/hotspot/cpu/riscv/riscv_v.ad
- create mode 100644 src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/templateTable_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/templateTable_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/vmStructs_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/vm_version_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/vm_version_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/vmreg_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/vmreg_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp
- create mode 100644 src/hotspot/cpu/riscv/vtableStubs_riscv.cpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java
- create mode 100644 test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
-
-diff --git a/make/autoconf/build-aux/config.guess b/make/autoconf/build-aux/config.guess
-index a88a9adec3f..15111d827ab 100644
---- a/make/autoconf/build-aux/config.guess
-+++ b/make/autoconf/build-aux/config.guess
+diff --git a/.github/workflows/build-cross-compile.yml b/.github/workflows/build-cross-compile.yml
+index 385b097b9f..4eebe79871 100644
+--- a/.github/workflows/build-cross-compile.yml
++++ b/.github/workflows/build-cross-compile.yml
+@@ -54,28 +54,39 @@ jobs:
+           - arm
+           - s390x
+           - ppc64le
++          - riscv64
+         include:
+           - target-cpu: aarch64
+             gnu-arch: aarch64
+             debian-arch: arm64
+             debian-repository: https://httpredir.debian.org/debian/
+             debian-version: bullseye
++            tolerate-sysroot-errors: false
+           - target-cpu: arm
+             gnu-arch: arm
+             debian-arch: armhf
+             debian-repository: https://httpredir.debian.org/debian/
+             debian-version: bullseye
++            tolerate-sysroot-errors: false
+             gnu-abi: eabihf
+           - target-cpu: s390x
+             gnu-arch: s390x
+             debian-arch: s390x
+             debian-repository: https://httpredir.debian.org/debian/
+             debian-version: bullseye
++            tolerate-sysroot-errors: false
+           - target-cpu: ppc64le
+             gnu-arch: powerpc64le
+             debian-arch: ppc64el
+             debian-repository: https://httpredir.debian.org/debian/
+             debian-version: bullseye
++            tolerate-sysroot-errors: false
++          - target-cpu: riscv64
++            gnu-arch: riscv64
++            debian-arch: riscv64
++            debian-repository: https://snapshot.debian.org/archive/debian/20240228T034848Z/
++            debian-version: sid
++            tolerate-sysroot-errors: true
+ 
+     steps:
+       - name: 'Checkout the JDK source'
+@@ -113,6 +124,7 @@ jobs:
+         if: steps.get-cached-sysroot.outputs.cache-hit != 'true'
+ 
+       - name: 'Create sysroot'
++        id: create-sysroot
+         run: >
+           sudo debootstrap
+           --arch=${{ matrix.debian-arch }}
+@@ -123,6 +135,7 @@ jobs:
+           ${{ matrix.debian-version }}
+           sysroot
+           ${{ matrix.debian-repository }}
++        continue-on-error: ${{ matrix.tolerate-sysroot-errors }}
+         if: steps.get-cached-sysroot.outputs.cache-hit != 'true'
+ 
+       - name: 'Prepare sysroot'
+@@ -134,7 +147,12 @@ jobs:
+           rm -rf sysroot/usr/{sbin,bin,share}
+           rm -rf sysroot/usr/lib/{apt,gcc,udev,systemd}
+           rm -rf sysroot/usr/libexec/gcc
+-        if: steps.get-cached-sysroot.outputs.cache-hit != 'true'
++        if: steps.create-sysroot.outcome == 'success' && steps.get-cached-sysroot.outputs.cache-hit != 'true'
++
++      - name: 'Remove broken sysroot'
++        run: |
++          sudo rm -rf sysroot/
++        if: steps.create-sysroot.outcome != 'success' && steps.get-cached-sysroot.outputs.cache-hit != 'true'
+ 
+       - name: 'Configure'
+         run: >
+@@ -153,6 +171,7 @@ jobs:
+           echo "Dumping config.log:" &&
+           cat config.log &&
+           exit 1)
++        if: steps.create-sysroot.outcome == 'success' || steps.get-cached-sysroot.outputs.cache-hit == 'true'
+ 
+       - name: 'Build'
+         id: build
+@@ -160,3 +179,4 @@ jobs:
+         with:
+           make-target: 'hotspot ${{ inputs.make-arguments }}'
+           platform: linux-${{ matrix.target-cpu }}
++        if: steps.create-sysroot.outcome == 'success' || steps.get-cached-sysroot.outputs.cache-hit == 'true'
+diff --git a/.github/workflows/build-macos.yml b/.github/workflows/build-macos.yml
+index 4186c451b7..678f5a038e 100644
+--- a/.github/workflows/build-macos.yml
++++ b/.github/workflows/build-macos.yml
+@@ -55,7 +55,7 @@ on:
+ jobs:
+   build-macos:
+     name: build
+-    runs-on: macos-13
++    runs-on: macos-12
+ 
+     strategy:
+       fail-fast: false
+diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
+index 78a8e1e0d4..46cae3afbf 100644
+--- a/.github/workflows/main.yml
++++ b/.github/workflows/main.yml
+@@ -223,7 +223,7 @@ jobs:
+     uses: ./.github/workflows/build-macos.yml
+     with:
+       platform: macos-x64
+-      xcode-toolset-version: '14.3.1'
++      xcode-toolset-version: '13.4.1'
+       configure-arguments: ${{ github.event.inputs.configure-arguments }}
+       make-arguments: ${{ github.event.inputs.make-arguments }}
+     if: needs.select.outputs.macos-x64 == 'true'
+@@ -234,7 +234,7 @@ jobs:
+     uses: ./.github/workflows/build-macos.yml
+     with:
+       platform: macos-aarch64
+-      xcode-toolset-version: '14.3.1'
++      xcode-toolset-version: '13.4.1'
+       extra-conf-options: '--openjdk-target=aarch64-apple-darwin'
+       configure-arguments: ${{ github.event.inputs.configure-arguments }}
+       make-arguments: ${{ github.event.inputs.make-arguments }}
+@@ -298,7 +298,7 @@ jobs:
+     with:
+       platform: macos-x64
+       bootjdk-platform: macos-x64
+-      runs-on: macos-13
++      runs-on: macos-12
+ 
+   test-windows-x64:
+     name: windows-x64
+@@ -341,7 +341,7 @@ jobs:
+               -H 'Accept: application/vnd.github+json' \
+               -H 'Authorization: Bearer ${{ github.token }}' \
+               -H 'X-GitHub-Api-Version: 2022-11-28' \
+-              '${{ github.api_url }}/repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/artifacts')"
++              '${{ github.api_url }}/repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/artifacts?per_page=100')"
+           BUNDLE_ARTIFACT_IDS="$(echo "$ALL_ARTIFACT_IDS" | jq -r -c '.artifacts | map(select(.name|startswith("bundles-"))) | .[].id')"
+           for id in $BUNDLE_ARTIFACT_IDS; do
+             echo "Removing $id"
+diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
+index c3560f2135..dacf8eaba1 100644
+--- a/.github/workflows/test.yml
++++ b/.github/workflows/test.yml
+@@ -127,7 +127,7 @@ jobs:
+         run: |
+           # On macOS we need to install some dependencies for testing
+           brew install make
+-          sudo xcode-select --switch /Applications/Xcode_14.3.1.app/Contents/Developer
++          sudo xcode-select --switch /Applications/Xcode_13.4.1.app/Contents/Developer
+           # This will make GNU make available as 'make' and not only as 'gmake'
+           echo '/usr/local/opt/make/libexec/gnubin' >> $GITHUB_PATH
+         if: runner.os == 'macOS'
+diff --git a/.jcheck/conf b/.jcheck/conf
+index 5636278120..d13b1bf5e8 100644
+--- a/.jcheck/conf
++++ b/.jcheck/conf
+@@ -1,5 +1,5 @@
+ [general]
+-project=jdk-updates
++project=riscv-port
+ jbs=JDK
+ version=11.0.25
+ 
+diff --git a/SECURITY.md b/SECURITY.md
+new file mode 100644
+index 0000000000..f4c5e7e67c
+--- /dev/null
++++ b/SECURITY.md
+@@ -0,0 +1,3 @@
++# JDK Vulnerabilities
++
++Please follow the process outlined in the [OpenJDK Vulnerability Policy](https://openjdk.org/groups/vulnerability/report) to disclose vulnerabilities in the JDK.
+diff --git a/make/autoconf/build-aux/config.sub b/make/autoconf/build-aux/config.sub
+index 3c280ac7c0..6c66c221e0 100644
+--- a/make/autoconf/build-aux/config.sub
++++ b/make/autoconf/build-aux/config.sub
 @@ -1,6 +1,6 @@
  #!/bin/sh
  #
--# Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights reserved.
-+# Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved.
- # Copyright (c) 2021, Azul Systems, Inc. All rights reserved.
+-# Copyright (c) 2014, 2020, Oracle and/or its affiliates. All rights reserved.
++# Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
  # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  #
+ # This code is free software; you can redistribute it and/or modify it
+@@ -40,6 +40,13 @@ if echo $* | grep pc-msys >/dev/null ; then
+     exit
+ fi
+ 
++# Canonicalize for riscv which autoconf-config.sub doesn't handle
++if echo $* | grep '^riscv\(32\|64\)-linux' >/dev/null ; then
++    result=`echo $@ | sed 's/linux/unknown-linux/'`
++    echo $result
++    exit
++fi
++
+ # First, filter out everything that doesn't begin with "aarch64-"
+ if ! echo $* | grep '^aarch64-' >/dev/null ; then
+     . $DIR/autoconf-config.sub "$@"
 diff --git a/make/autoconf/hotspot.m4 b/make/autoconf/hotspot.m4
-index 9bb34363e5c..f84e8f84c60 100644
+index 9bb34363e5..f84e8f84c6 100644
 --- a/make/autoconf/hotspot.m4
 +++ b/make/autoconf/hotspot.m4
 @@ -370,7 +370,8 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES],
@@ -353,32 +211,8 @@ index 9bb34363e5c..f84e8f84c60 100644
        AC_MSG_RESULT([yes])
      else
        DISABLED_JVM_FEATURES="$DISABLED_JVM_FEATURES shenandoahgc"
-diff --git a/make/autoconf/libraries.m4 b/make/autoconf/libraries.m4
-index 16e906bdc6a..5c49fd9285d 100644
---- a/make/autoconf/libraries.m4
-+++ b/make/autoconf/libraries.m4
-@@ -1,5 +1,5 @@
- #
--# Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved.
-+# Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved.
- # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- #
- # This code is free software; you can redistribute it and/or modify it
-@@ -130,6 +130,12 @@ AC_DEFUN_ONCE([LIB_SETUP_LIBRARIES],
-     BASIC_JVM_LIBS="$BASIC_JVM_LIBS -lthread"
-   fi
- 
-+  # Because RISC-V only has word-sized atomics, it requries libatomic where
-+  # other common architectures do not.  So link libatomic by default.
-+  if test "x$OPENJDK_TARGET_OS" = xlinux && test "x$OPENJDK_TARGET_CPU" = xriscv64; then
-+    BASIC_JVM_LIBS="$BASIC_JVM_LIBS -latomic"
-+  fi
-+
-   # perfstat lib
-   if test "x$OPENJDK_TARGET_OS" = xaix; then
-     BASIC_JVM_LIBS="$BASIC_JVM_LIBS -lperfstat"
 diff --git a/make/autoconf/platform.m4 b/make/autoconf/platform.m4
-index 26a58eb2ee8..67972d89248 100644
+index 5d1d9efa39..565ca18e20 100644
 --- a/make/autoconf/platform.m4
 +++ b/make/autoconf/platform.m4
 @@ -1,5 +1,5 @@
@@ -397,17 +231,21 @@ index 26a58eb2ee8..67972d89248 100644
  
    # The cpu defines below are for zero, we don't support them directly.
    elif test "x$OPENJDK_$1_CPU" = xsparc; then
-@@ -564,8 +566,6 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER],
-     HOTSPOT_$1_CPU_DEFINE=S390
-   elif test "x$OPENJDK_$1_CPU" = xs390x; then
-     HOTSPOT_$1_CPU_DEFINE=S390
--  elif test "x$OPENJDK_$1_CPU" = xriscv64; then
--    HOTSPOT_$1_CPU_DEFINE=RISCV
-   elif test "x$OPENJDK_$1_CPU" = xloongarch64; then
-     HOTSPOT_$1_CPU_DEFINE=LOONGARCH64
-   elif test "x$OPENJDK_$1_CPU" != x; then
+diff --git a/make/autoconf/version-numbers b/make/autoconf/version-numbers
+index fe5e0d9850..c02b769bf2 100644
+--- a/make/autoconf/version-numbers
++++ b/make/autoconf/version-numbers
+@@ -37,7 +37,7 @@ DEFAULT_VERSION_DATE=2024-10-15
+ DEFAULT_VERSION_CLASSFILE_MAJOR=55  # "`$EXPR $DEFAULT_VERSION_FEATURE + 44`"
+ DEFAULT_VERSION_CLASSFILE_MINOR=0
+ DEFAULT_ACCEPTABLE_BOOT_VERSIONS="10 11"
+-DEFAULT_PROMOTED_VERSION_PRE=
++DEFAULT_PROMOTED_VERSION_PRE=ea
+ 
+ LAUNCHER_NAME=openjdk
+ PRODUCT_NAME=OpenJDK
 diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk
-index c5a3ac5724b..67f4c6f0574 100644
+index c5a3ac5724..51137b99db 100644
 --- a/make/hotspot/gensrc/GensrcAdlc.gmk
 +++ b/make/hotspot/gensrc/GensrcAdlc.gmk
 @@ -1,5 +1,5 @@
@@ -417,13 +255,12 @@ index c5a3ac5724b..67f4c6f0574 100644
  # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  #
  # This code is free software; you can redistribute it and/or modify it
-@@ -150,6 +150,13 @@ ifeq ($(call check-jvm-feature, compiler2), true)
+@@ -150,6 +150,12 @@ ifeq ($(call check-jvm-feature, compiler2), true)
        $d/os_cpu/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH).ad \
      )))
  
 +  ifeq ($(HOTSPOT_TARGET_CPU_ARCH), riscv)
 +    AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
-+        $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_v.ad \
 +        $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_b.ad \
 +    )))
 +  endif
@@ -431,79 +268,97 @@ index c5a3ac5724b..67f4c6f0574 100644
    ifeq ($(call check-jvm-feature, shenandoahgc), true)
      AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
          $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/shenandoah/shenandoah_$(HOTSPOT_TARGET_CPU).ad \
-diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
-index fdd2c0ca3d7..63f193de86e 100644
---- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
-+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
-  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-@@ -1593,7 +1593,9 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
- }
- 
- 
--void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
-+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
-+                          LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
-+  assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on aarch64");
- 
-   Assembler::Condition acond, ncond;
-   switch (condition) {
-diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
-index f0a7229aa18..cb095052534 100644
---- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
-+++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
+diff --git a/make/hotspot/lib/JvmFlags.gmk b/make/hotspot/lib/JvmFlags.gmk
+index 3246c83155..1a91eb0079 100644
+--- a/make/hotspot/lib/JvmFlags.gmk
++++ b/make/hotspot/lib/JvmFlags.gmk
+@@ -67,10 +67,12 @@ JVM_CFLAGS_TARGET_DEFINES += \
+     #
+ 
+ ifeq ($(DEBUG_LEVEL), release)
++  # release builds disable uses of assert macro from <assert.h>.
++  JVM_CFLAGS_DEBUGLEVEL := -DNDEBUG
+   # For hotspot, release builds differ internally between "optimized" and "product"
+   # in that "optimize" does not define PRODUCT.
+   ifneq ($(HOTSPOT_DEBUG_LEVEL), optimized)
+-    JVM_CFLAGS_DEBUGLEVEL := -DPRODUCT
++    JVM_CFLAGS_DEBUGLEVEL += -DPRODUCT
+   endif
+ else ifeq ($(DEBUG_LEVEL), fastdebug)
+   JVM_CFLAGS_DEBUGLEVEL := -DASSERT
+diff --git a/make/jdk/src/classes/build/tools/tzdb/TzdbZoneRulesProvider.java b/make/jdk/src/classes/build/tools/tzdb/TzdbZoneRulesProvider.java
+index f02537c305..ef94e3879c 100644
+--- a/make/jdk/src/classes/build/tools/tzdb/TzdbZoneRulesProvider.java
++++ b/make/jdk/src/classes/build/tools/tzdb/TzdbZoneRulesProvider.java
 @@ -1,5 +1,5 @@
  /*
-- * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2008, 2022, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2024, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -1824,7 +1824,10 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
- }
- 
+@@ -363,33 +363,35 @@ class TzdbZoneRulesProvider {
+         }
  
--void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
-+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
-+                          LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
-+  assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on arm");
-+
-   AsmCondition acond = al;
-   AsmCondition ncond = nv;
-   if (opr1 != opr2) {
-diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
-index 847f7d61d2f..d74db914331 100644
---- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
-+++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
-@@ -1,6 +1,6 @@
- /*
-- * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2012, 2019, SAP SE. All rights reserved.
-+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2012, 2021 SAP SE. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -1553,8 +1553,10 @@ inline void load_to_reg(LIR_Assembler *lasm, LIR_Opr src, LIR_Opr dst) {
-   }
- }
+         Month parseMonth(String mon) {
+-            switch (mon) {
+-            case "Jan": return Month.JANUARY;
+-            case "Feb": return Month.FEBRUARY;
+-            case "Mar": return Month.MARCH;
+-            case "Apr": return Month.APRIL;
+-            case "May": return Month.MAY;
+-            case "Jun": return Month.JUNE;
+-            case "Jul": return Month.JULY;
+-            case "Aug": return Month.AUGUST;
+-            case "Sep": return Month.SEPTEMBER;
+-            case "Oct": return Month.OCTOBER;
+-            case "Nov": return Month.NOVEMBER;
+-            case "Dec": return Month.DECEMBER;
+-            }
++            int len = mon.length();
++
++            if (mon.regionMatches(true, 0, "January", 0, len)) return Month.JANUARY;
++            if (mon.regionMatches(true, 0, "February", 0, len)) return Month.FEBRUARY;
++            if (mon.regionMatches(true, 0, "March", 0, len)) return Month.MARCH;
++            if (mon.regionMatches(true, 0, "April", 0, len)) return Month.APRIL;
++            if (mon.regionMatches(true, 0, "May", 0, len)) return Month.MAY;
++            if (mon.regionMatches(true, 0, "June", 0, len)) return Month.JUNE;
++            if (mon.regionMatches(true, 0, "July", 0, len)) return Month.JULY;
++            if (mon.regionMatches(true, 0, "August", 0, len)) return Month.AUGUST;
++            if (mon.regionMatches(true, 0, "September", 0, len)) return Month.SEPTEMBER;
++            if (mon.regionMatches(true, 0, "October", 0, len)) return Month.OCTOBER;
++            if (mon.regionMatches(true, 0, "November", 0, len)) return Month.NOVEMBER;
++            if (mon.regionMatches(true, 0, "December", 0, len)) return Month.DECEMBER;
++
+             throw new IllegalArgumentException("Unknown month: " + mon);
+         }
  
-+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
-+                          LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
-+  assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on ppc");
+         DayOfWeek parseDayOfWeek(String dow) {
+-            switch (dow) {
+-            case "Mon": return DayOfWeek.MONDAY;
+-            case "Tue": return DayOfWeek.TUESDAY;
+-            case "Wed": return DayOfWeek.WEDNESDAY;
+-            case "Thu": return DayOfWeek.THURSDAY;
+-            case "Fri": return DayOfWeek.FRIDAY;
+-            case "Sat": return DayOfWeek.SATURDAY;
+-            case "Sun": return DayOfWeek.SUNDAY;
+-            }
++            int len = dow.length();
++
++            if (dow.regionMatches(true, 0, "Monday", 0, len)) return DayOfWeek.MONDAY;
++            if (dow.regionMatches(true, 0, "Tuesday", 0, len)) return DayOfWeek.TUESDAY;
++            if (dow.regionMatches(true, 0, "Wednesday", 0, len)) return DayOfWeek.WEDNESDAY;
++            if (dow.regionMatches(true, 0, "Thursday", 0, len)) return DayOfWeek.THURSDAY;
++            if (dow.regionMatches(true, 0, "Friday", 0, len)) return DayOfWeek.FRIDAY;
++            if (dow.regionMatches(true, 0, "Saturday", 0, len)) return DayOfWeek.SATURDAY;
++            if (dow.regionMatches(true, 0, "Sunday", 0, len)) return DayOfWeek.SUNDAY;
++
+             throw new IllegalArgumentException("Unknown day-of-week: " + dow);
+         }
  
--void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
-   if (opr1->is_equal(opr2) || opr1->is_same_register(opr2)) {
-     load_to_reg(this, opr1, result); // Condition doesn't matter.
-     return;
 diff --git a/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp b/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp
 new file mode 100644
-index 00000000000..31c63abe71d
+index 0000000000..31c63abe71
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp
 @@ -0,0 +1,177 @@
@@ -686,10 +541,10 @@ index 00000000000..31c63abe71d
 +}
 diff --git a/src/hotspot/cpu/riscv/assembler_riscv.cpp b/src/hotspot/cpu/riscv/assembler_riscv.cpp
 new file mode 100644
-index 00000000000..f15ef5304c5
+index 0000000000..67c6f1eccb
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/assembler_riscv.cpp
-@@ -0,0 +1,372 @@
+@@ -0,0 +1,337 @@
 +/*
 + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
@@ -742,7 +597,7 @@ index 00000000000..f15ef5304c5
 +  }
 +}
 +
-+void Assembler::addw(Register Rd, Register Rn, int64_t increment, Register temp) {
++void Assembler::addw(Register Rd, Register Rn, int32_t increment, Register temp) {
 +  if (is_imm_in_range(increment, 12, 0)) {
 +    addiw(Rd, Rn, increment);
 +  } else {
@@ -762,7 +617,7 @@ index 00000000000..f15ef5304c5
 +  }
 +}
 +
-+void Assembler::subw(Register Rd, Register Rn, int64_t decrement, Register temp) {
++void Assembler::subw(Register Rd, Register Rn, int32_t decrement, Register temp) {
 +  if (is_imm_in_range(-decrement, 12, 0)) {
 +    addiw(Rd, Rn, -decrement);
 +  } else {
@@ -808,33 +663,6 @@ index 00000000000..f15ef5304c5
 +  }
 +}
 +
-+void Assembler::li64(Register Rd, int64_t imm) {
-+   // Load upper 32 bits. upper = imm[63:32], but if imm[31] == 1 or
-+   // (imm[31:28] == 0x7ff && imm[19] == 1), upper = imm[63:32] + 1.
-+   int64_t lower = imm & 0xffffffff;
-+   lower -= ((lower << 44) >> 44);
-+   int64_t tmp_imm = ((uint64_t)(imm & 0xffffffff00000000)) + (uint64_t)lower;
-+   int32_t upper = (tmp_imm - (int32_t)lower) >> 32;
-+
-+   // Load upper 32 bits
-+   int64_t up = upper, lo = upper;
-+   lo = (lo << 52) >> 52;
-+   up -= lo;
-+   up = (int32_t)up;
-+   lui(Rd, up);
-+   addi(Rd, Rd, lo);
-+
-+   // Load the rest 32 bits.
-+   slli(Rd, Rd, 12);
-+   addi(Rd, Rd, (int32_t)lower >> 20);
-+   slli(Rd, Rd, 12);
-+   lower = ((int32_t)imm << 12) >> 20;
-+   addi(Rd, Rd, lower);
-+   slli(Rd, Rd, 8);
-+   lower = imm & 0xff;
-+   addi(Rd, Rd, lower);
-+}
-+
 +void Assembler::li32(Register Rd, int32_t imm) {
 +  // int32_t is in range 0x8000 0000 ~ 0x7fff ffff, and imm[31] is the sign bit
 +  int64_t upper = imm, lower = imm;
@@ -843,7 +671,6 @@ index 00000000000..f15ef5304c5
 +  upper = (int32_t)upper;
 +  // lui Rd, imm[31:12] + imm[11]
 +  lui(Rd, upper);
-+  // use addiw to distinguish li32 to li64
 +  addiw(Rd, Rd, lower);
 +}
 +
@@ -907,7 +734,7 @@ index 00000000000..f15ef5304c5
 +  void Assembler::NAME(const Address &adr, Register temp) {    \
 +    switch (adr.getMode()) {                                   \
 +      case Address::literal: {                                 \
-+        code_section()->relocate(pc(), adr.rspec());           \
++        relocate(adr.rspec());                                 \
 +        NAME(adr.target(), temp);                              \
 +        break;                                                 \
 +      }                                                        \
@@ -965,7 +792,7 @@ index 00000000000..f15ef5304c5
 +}
 +
 +void Assembler::movptr_with_offset(Register Rd, address addr, int32_t &offset) {
-+  uintptr_t imm64 = (uintptr_t)addr;
++  int64_t imm64 = (int64_t)addr;
 +#ifndef PRODUCT
 +  {
 +    char buffer[64];
@@ -973,10 +800,10 @@ index 00000000000..f15ef5304c5
 +    block_comment(buffer);
 +  }
 +#endif
-+  assert(is_unsigned_imm_in_range(imm64, 47, 0) || (imm64 == (uintptr_t)-1),
-+         "48-bit overflow in address constant");
-+  // Load upper 32 bits
-+  int32_t imm = imm64 >> 16;
++  assert(is_unsigned_imm_in_range(imm64, 47, 0) || (imm64 == (int64_t)-1),
++         "bit 47 overflows in address constant");
++  // Load upper 31 bits
++  int64_t imm = imm64 >> 17;
 +  int64_t upper = imm, lower = imm;
 +  lower = (lower << 52) >> 52;
 +  upper -= lower;
@@ -984,13 +811,13 @@ index 00000000000..f15ef5304c5
 +  lui(Rd, upper);
 +  addi(Rd, Rd, lower);
 +
-+  // Load the rest 16 bits.
++  // Load the rest 17 bits.
 +  slli(Rd, Rd, 11);
-+  addi(Rd, Rd, (imm64 >> 5) & 0x7ff);
-+  slli(Rd, Rd, 5);
++  addi(Rd, Rd, (imm64 >> 6) & 0x7ff);
++  slli(Rd, Rd, 6);
 +
 +  // This offset will be used by following jalr/ld.
-+  offset = imm64 & 0x1f;
++  offset = imm64 & 0x3f;
 +}
 +
 +void Assembler::movptr(Register Rd, uintptr_t imm64) {
@@ -1003,13 +830,6 @@ index 00000000000..f15ef5304c5
 +  addi(Rd, Rd, offset);
 +}
 +
-+void Assembler::ifence() {
-+  fence_i();
-+  if (UseConservativeFence) {
-+    fence(ir, ir);
-+  }
-+}
-+
 +#define INSN(NAME, NEG_INSN)                                                         \
 +  void Assembler::NAME(Register Rs, Register Rt, const address &dest) {              \
 +    NEG_INSN(Rt, Rs, dest);                                                          \
@@ -1064,10 +884,10 @@ index 00000000000..f15ef5304c5
 +}
 diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
 new file mode 100644
-index 00000000000..4923962a496
+index 0000000000..9f6c477afa
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
-@@ -0,0 +1,3047 @@
+@@ -0,0 +1,3056 @@
 +/*
 + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
@@ -1253,13 +1073,22 @@ index 00000000000..4923962a496
 +    : _base(noreg), _index(noreg), _offset(0), _mode(no_mode), _target(NULL) { }
 +  Address(Register r)
 +    : _base(r), _index(noreg), _offset(0), _mode(base_plus_offset), _target(NULL) { }
-+
-+  template<typename T, ENABLE_IF(std::is_integral<T>::value)>
-+  Address(Register r, T o)
-+    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) {}
-+
++  Address(Register r, int o)
++    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
++  Address(Register r, long o)
++    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
++  Address(Register r, long long o)
++    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
++  Address(Register r, unsigned int o)
++    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
++  Address(Register r, unsigned long o)
++    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
++  Address(Register r, unsigned long long o)
++    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
++#ifdef ASSERT
 +  Address(Register r, ByteSize disp)
-+    : Address(r, in_bytes(disp)) {}
++    : _base(r), _index(noreg), _offset(in_bytes(disp)), _mode(base_plus_offset), _target(0) { }
++#endif
 +  Address(address target, RelocationHolder const& rspec)
 +    : _base(noreg),
 +      _index(noreg),
@@ -1338,14 +1167,6 @@ index 00000000000..4923962a496
 +
 +  enum { instruction_size = 4 };
 +
-+  //---<  calculate length of instruction  >---
-+  // We just use the values set above.
-+  // instruction must start at passed address
-+  static unsigned int instr_len(unsigned char *instr) { return instruction_size; }
-+
-+  //---<  longest instructions  >---
-+  static unsigned int instr_maxlen() { return instruction_size; }
-+
 +  enum RoundingMode {
 +    rne = 0b000,     // round to Nearest, ties to Even
 +    rtz = 0b001,     // round towards Zero
@@ -1383,11 +1204,9 @@ index 00000000000..4923962a496
 +
 +  void _li(Register Rd, int64_t imm);  // optimized load immediate
 +  void li32(Register Rd, int32_t imm);
-+  void li64(Register Rd, int64_t imm);
 +  void movptr(Register Rd, address addr);
 +  void movptr_with_offset(Register Rd, address addr, int32_t &offset);
 +  void movptr(Register Rd, uintptr_t imm64);
-+  void ifence();
 +  void j(const address &dest, Register temp = t0);
 +  void j(const Address &adr, Register temp = t0);
 +  void j(Label &l, Register temp = t0);
@@ -1508,10 +1327,9 @@ index 00000000000..4923962a496
 +
 +#define INSN_ENTRY_RELOC(result_type, header)                               \
 +  result_type header {                                                      \
-+    InstructionMark im(this);                                               \
 +    guarantee(rtype == relocInfo::internal_word_type,                       \
 +              "only internal_word_type relocs make sense here");            \
-+    code_section()->relocate(inst_mark(), InternalAddress(dest).rspec());
++    relocate(InternalAddress(dest).rspec());
 +
 +  // Load/store register (all modes)
 +#define INSN(NAME, op, funct3)                                                                     \
@@ -1556,7 +1374,7 @@ index 00000000000..4923962a496
 +  void NAME(Register Rd, const Address &adr, Register temp = t0) {                                 \
 +    switch (adr.getMode()) {                                                                       \
 +      case Address::literal: {                                                                     \
-+        code_section()->relocate(pc(), adr.rspec());                                               \
++        relocate(adr.rspec());                                                                     \
 +        NAME(Rd, adr.target());                                                                    \
 +        break;                                                                                     \
 +      }                                                                                            \
@@ -1630,7 +1448,7 @@ index 00000000000..4923962a496
 +  void NAME(FloatRegister Rd, const Address &adr, Register temp = t0) {                            \
 +    switch (adr.getMode()) {                                                                       \
 +      case Address::literal: {                                                                     \
-+        code_section()->relocate(pc(), adr.rspec());                                               \
++        relocate(adr.rspec());                                                                     \
 +        NAME(Rd, adr.target(), temp);                                                              \
 +        break;                                                                                     \
 +      }                                                                                            \
@@ -1773,7 +1591,7 @@ index 00000000000..4923962a496
 +    switch (adr.getMode()) {                                                                       \
 +      case Address::literal: {                                                                     \
 +        assert_different_registers(Rs, temp);                                                      \
-+        code_section()->relocate(pc(), adr.rspec());                                               \
++        relocate(adr.rspec());                                                                     \
 +        NAME(Rs, adr.target(), temp);                                                              \
 +        break;                                                                                     \
 +      }                                                                                            \
@@ -1816,7 +1634,7 @@ index 00000000000..4923962a496
 +  void NAME(FloatRegister Rs, const Address &adr, Register temp = t0) {                            \
 +    switch (adr.getMode()) {                                                                       \
 +      case Address::literal: {                                                                     \
-+        code_section()->relocate(pc(), adr.rspec());                                               \
++        relocate(adr.rspec());                                                                     \
 +        NAME(Rs, adr.target(), temp);                                                              \
 +        break;                                                                                     \
 +      }                                                                                            \
@@ -1966,7 +1784,6 @@ index 00000000000..4923962a496
 +    emit(insn);                                             \
 +  }
 +
-+  INSN(fence_i, 0b0001111, 0b001, 0b000000000000);
 +  INSN(ecall,   0b1110011, 0b000, 0b000000000000);
 +  INSN(_ebreak, 0b1110011, 0b000, 0b000000000001);
 +
@@ -3014,6 +2831,7 @@ index 00000000000..4923962a496
 +
 +// ====================================
 +// RISC-V Bit-Manipulation Extension
++// Currently only support Zba, Zbb and Zbs bitmanip extensions.
 +// ====================================
 +#define INSN(NAME, op, funct3, funct7)                  \
 +  void NAME(Register Rd, Register Rs1, Register Rs2) {  \
@@ -3088,6 +2906,7 @@ index 00000000000..4923962a496
 +
 +  INSN(rori,    0b0010011, 0b101, 0b011000);
 +  INSN(slli_uw, 0b0011011, 0b001, 0b000010);
++  INSN(bexti,   0b0010011, 0b101, 0b010010);
 +
 +#undef INSN
 +
@@ -4084,11 +3903,12 @@ index 00000000000..4923962a496
 +  void wrap_label(Register r, Label &L, Register t, load_insn_by_temp insn);
 +  void wrap_label(Register r, Label &L, jal_jalr_insn insn);
 +
-+  // calculate pseudoinstruction
++  // Computational pseudo instructions
 +  void add(Register Rd, Register Rn, int64_t increment, Register temp = t0);
-+  void addw(Register Rd, Register Rn, int64_t increment, Register temp = t0);
++  void addw(Register Rd, Register Rn, int32_t increment, Register temp = t0);
++
 +  void sub(Register Rd, Register Rn, int64_t decrement, Register temp = t0);
-+  void subw(Register Rd, Register Rn, int64_t decrement, Register temp = t0);
++  void subw(Register Rd, Register Rn, int32_t decrement, Register temp = t0);
 +
 +  // RVB pseudo instructions
 +  // zero extend word
@@ -4097,6 +3917,13 @@ index 00000000000..4923962a496
 +  Assembler(CodeBuffer* code) : AbstractAssembler(code), _in_compressible_region(false) {
 +  }
 +
++  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
++                                                Register tmp,
++                                                int offset) {
++    ShouldNotCallThis();
++    return RegisterOrConstant();
++  }
++
 +  // Stack overflow checking
 +  virtual void bang_stack_with_offset(int offset) { Unimplemented(); }
 +
@@ -4114,10 +3941,12 @@ index 00000000000..4923962a496
 +  virtual ~Assembler() {}
 +};
 +
++class BiasedLockingCounters;
++
 +#endif // CPU_RISCV_ASSEMBLER_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp
 new file mode 100644
-index 00000000000..7ffe8803985
+index 0000000000..7ffe880398
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp
 @@ -0,0 +1,47 @@
@@ -4170,7 +3999,7 @@ index 00000000000..7ffe8803985
 +#endif // CPU_RISCV_ASSEMBLER_RISCV_INLINE_HPP
 diff --git a/src/hotspot/cpu/riscv/bytes_riscv.hpp b/src/hotspot/cpu/riscv/bytes_riscv.hpp
 new file mode 100644
-index 00000000000..23d982f9abd
+index 0000000000..485a5f9355
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/bytes_riscv.hpp
 @@ -0,0 +1,167 @@
@@ -4203,8 +4032,6 @@ index 00000000000..23d982f9abd
 +#ifndef CPU_RISCV_BYTES_RISCV_HPP
 +#define CPU_RISCV_BYTES_RISCV_HPP
 +
-+#include "memory/allStatic.hpp"
-+
 +class Bytes: AllStatic {
 + public:
 +  // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering
@@ -4251,6 +4078,7 @@ index 00000000000..23d982f9abd
 +               ((u8)(((u4*)p)[0]));
 +
 +      case 2:
++      case 6:
 +        return ((u8)(((u2*)p)[3]) << 48) |
 +               ((u8)(((u2*)p)[2]) << 32) |
 +               ((u8)(((u2*)p)[1]) << 16) |
@@ -4309,6 +4137,7 @@ index 00000000000..23d982f9abd
 +        break;
 +
 +      case 2:
++      case 6:
 +        ((u2*)p)[3] = x >> 48;
 +        ((u2*)p)[2] = x >> 32;
 +        ((u2*)p)[1] = x >> 16;
@@ -4343,10 +4172,10 @@ index 00000000000..23d982f9abd
 +#endif // CPU_RISCV_BYTES_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
 new file mode 100644
-index 00000000000..dcd0472c540
+index 0000000000..9729e16c96
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
-@@ -0,0 +1,353 @@
+@@ -0,0 +1,339 @@
 +/*
 + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
@@ -4388,20 +4217,6 @@ index 00000000000..dcd0472c540
 +
 +#define __ ce->masm()->
 +
-+void C1SafepointPollStub::emit_code(LIR_Assembler* ce) {
-+  __ bind(_entry);
-+  InternalAddress safepoint_pc(__ pc() - __ offset() + safepoint_offset());
-+  __ code_section()->relocate(__ pc(), safepoint_pc.rspec());
-+  __ la(t0, safepoint_pc.target());
-+  __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset()));
-+
-+  assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
-+         "polling page return stub not created yet");
-+  address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
-+
-+  __ far_jump(RuntimeAddress(stub));
-+}
-+
 +void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
 +  __ bind(_entry);
 +  Metadata *m = _method->as_constant_ptr()->as_metadata();
@@ -4421,7 +4236,7 @@ index 00000000000..dcd0472c540
 +}
 +
 +RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index)
-+  : _index(index), _array(), _throw_index_out_of_bounds_exception(true) {
++  : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) {
 +  assert(info != NULL, "must have info");
 +  _info = new CodeEmitInfo(info);
 +}
@@ -4446,7 +4261,7 @@ index 00000000000..dcd0472c540
 +  if (_throw_index_out_of_bounds_exception) {
 +    stub_id = Runtime1::throw_index_exception_id;
 +  } else {
-+    assert(_array != LIR_Opr::nullOpr(), "sanity");
++    assert(_array != NULL, "sanity");
 +    __ mv(t1, _array->as_pointer_register());
 +    stub_id = Runtime1::throw_range_check_failed_id;
 +  }
@@ -4653,7 +4468,7 @@ index 00000000000..dcd0472c540
 +  const int args_num = 5;
 +  VMRegPair args[args_num];
 +  BasicType signature[args_num] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT };
-+  SharedRuntime::java_calling_convention(signature, args, args_num);
++  SharedRuntime::java_calling_convention(signature, args, args_num, true);
 +
 +  // push parameters
 +  Register r[args_num];
@@ -4692,7 +4507,7 @@ index 00000000000..dcd0472c540
 +#ifndef PRODUCT
 +  if (PrintC1Statistics) {
 +    __ la(t1, ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt));
-+    __ add_memory_int32(Address(t1), 1);
++    __ incrementw(Address(t1));
 +  }
 +#endif
 +
@@ -4702,7 +4517,7 @@ index 00000000000..dcd0472c540
 +#undef __
 diff --git a/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp b/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp
 new file mode 100644
-index 00000000000..4417ad63091
+index 0000000000..4417ad6309
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp
 @@ -0,0 +1,84 @@
@@ -4792,7 +4607,7 @@ index 00000000000..4417ad63091
 +#endif // CPU_RISCV_C1_DEFS_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp
 new file mode 100644
-index 00000000000..e3a2606c532
+index 0000000000..e3a2606c53
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp
 @@ -0,0 +1,30 @@
@@ -4828,7 +4643,7 @@ index 00000000000..e3a2606c532
 +// No FPU stack on RISCV
 diff --git a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp
 new file mode 100644
-index 00000000000..7bc3d311501
+index 0000000000..7bc3d31150
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp
 @@ -0,0 +1,32 @@
@@ -4866,7 +4681,7 @@ index 00000000000..7bc3d311501
 +#endif // CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
 new file mode 100644
-index 00000000000..172031941b2
+index 0000000000..682ebe8262
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
 @@ -0,0 +1,388 @@
@@ -5028,8 +4843,8 @@ index 00000000000..172031941b2
 +LIR_Opr FrameMap::fpu10_float_opr;
 +LIR_Opr FrameMap::fpu10_double_opr;
 +
-+LIR_Opr FrameMap::_caller_save_cpu_regs[] = {};
-+LIR_Opr FrameMap::_caller_save_fpu_regs[] = {};
++LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, };
++LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, };
 +
 +//--------------------------------------------------------
 +//               FrameMap
@@ -5186,7 +5001,7 @@ index 00000000000..172031941b2
 +
 +  VMRegPair regs;
 +  BasicType sig_bt = T_OBJECT;
-+  SharedRuntime::java_calling_convention(&sig_bt, &regs, 1);
++  SharedRuntime::java_calling_convention(&sig_bt, &regs, 1, true);
 +  receiver_opr = as_oop_opr(regs.first()->as_Register());
 +
 +  for (i = 0; i < nof_caller_save_fpu_regs; i++) {
@@ -5260,7 +5075,7 @@ index 00000000000..172031941b2
 +}
 diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp
 new file mode 100644
-index 00000000000..01281f5c9e1
+index 0000000000..01281f5c9e
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp
 @@ -0,0 +1,148 @@
@@ -5414,10 +5229,10 @@ index 00000000000..01281f5c9e1
 +#endif // CPU_RISCV_C1_FRAMEMAP_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
 new file mode 100644
-index 00000000000..4c1c13dc290
+index 0000000000..2a99d49c94
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
-@@ -0,0 +1,281 @@
+@@ -0,0 +1,285 @@
 +/*
 + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
@@ -5610,7 +5425,7 @@ index 00000000000..4c1c13dc290
 +        code == lir_add ? __ add(dreg, lreg_lo, c) : __ sub(dreg, lreg_lo, c);
 +        break;
 +      case lir_div:
-+        assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
++        assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant");
 +        if (c == 1) {
 +          // move lreg_lo to dreg if divisor is 1
 +          __ mv(dreg, lreg_lo);
@@ -5628,7 +5443,7 @@ index 00000000000..4c1c13dc290
 +        }
 +        break;
 +      case lir_rem:
-+        assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
++        assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant");
 +        if (c == 1) {
 +          // move 0 to dreg if divisor is 1
 +          __ mv(dreg, zr);
@@ -5658,7 +5473,9 @@ index 00000000000..4c1c13dc290
 +  switch (code) {
 +    case lir_add: __ fadd_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
 +    case lir_sub: __ fsub_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
++    case lir_mul_strictfp: // fall through
 +    case lir_mul: __ fmul_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
++    case lir_div_strictfp: // fall through
 +    case lir_div: __ fdiv_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
 +    default:
 +      ShouldNotReachHere();
@@ -5671,7 +5488,9 @@ index 00000000000..4c1c13dc290
 +    switch (code) {
 +      case lir_add: __ fadd_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
 +      case lir_sub: __ fsub_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
++      case lir_mul_strictfp: // fall through
 +      case lir_mul: __ fmul_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
++      case lir_div_strictfp: // fall through
 +      case lir_div: __ fdiv_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
 +      default:
 +        ShouldNotReachHere();
@@ -5701,7 +5520,7 @@ index 00000000000..4c1c13dc290
 +#undef __
 diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp
 new file mode 100644
-index 00000000000..ab0a9963fc1
+index 0000000000..ab0a9963fc
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp
 @@ -0,0 +1,37 @@
@@ -5744,7 +5563,7 @@ index 00000000000..ab0a9963fc1
 +#endif // CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp
 new file mode 100644
-index 00000000000..b7f53e395f3
+index 0000000000..e6b95d3b7f
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp
 @@ -0,0 +1,388 @@
@@ -5807,7 +5626,7 @@ index 00000000000..b7f53e395f3
 +  __ mv(c_rarg4, j_rarg4);
 +#ifndef PRODUCT
 +  if (PrintC1Statistics) {
-+    __ add_memory_int32(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt), 1);
++    __ incrementw(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt));
 +  }
 +#endif
 +  __ far_call(RuntimeAddress(copyfunc_addr));
@@ -5845,14 +5664,14 @@ index 00000000000..b7f53e395f3
 +    if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) {
 +      __ load_klass(tmp, dst);
 +      __ lw(t0, Address(tmp, in_bytes(Klass::layout_helper_offset())));
-+      __ li(t1, Klass::_lh_neutral_value);
++      __ mv(t1, Klass::_lh_neutral_value);
 +      __ bge(t0, t1, *stub->entry(), /* is_far */ true);
 +    }
 +
 +    if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) {
 +      __ load_klass(tmp, src);
 +      __ lw(t0, Address(tmp, in_bytes(Klass::layout_helper_offset())));
-+      __ li(t1, Klass::_lh_neutral_value);
++      __ mv(t1, Klass::_lh_neutral_value);
 +      __ bge(t0, t1, *stub->entry(), /* is_far */ true);
 +    }
 +  }
@@ -5914,7 +5733,7 @@ index 00000000000..b7f53e395f3
 +  if (PrintC1Statistics) {
 +    Label failed;
 +    __ bnez(x10, failed);
-+    __ add_memory_int32(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_cnt), 1);
++    __ incrementw(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_cnt));
 +    __ bind(failed);
 +  }
 +#endif
@@ -5923,7 +5742,7 @@ index 00000000000..b7f53e395f3
 +
 +#ifndef PRODUCT
 +  if (PrintC1Statistics) {
-+    __ add_memory_int32(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_attempt_cnt), 1);
++    __ incrementw(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_attempt_cnt));
 +  }
 +#endif
 +  assert_different_registers(dst, dst_pos, length, src_pos, src, x10, t0);
@@ -6074,7 +5893,7 @@ index 00000000000..b7f53e395f3
 +
 +#ifndef PRODUCT
 +  if (PrintC1Statistics) {
-+    __ add_memory_int32(ExternalAddress(Runtime1::arraycopy_count_address(basic_type)), 1);
++    __ incrementw(ExternalAddress(Runtime1::arraycopy_count_address(basic_type)));
 +  }
 +#endif
 +  arraycopy_prepare_params(src, src_pos, length, dst, dst_pos, basic_type);
@@ -6138,7 +5957,7 @@ index 00000000000..b7f53e395f3
 +#undef __
 diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp
 new file mode 100644
-index 00000000000..06a0f248ca6
+index 0000000000..06a0f248ca
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp
 @@ -0,0 +1,52 @@
@@ -6196,10 +6015,10 @@ index 00000000000..06a0f248ca6
 +#endif // CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
 new file mode 100644
-index 00000000000..742c2126e60
+index 0000000000..fb6a60fb49
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-@@ -0,0 +1,2267 @@
+@@ -0,0 +1,2258 @@
 +/*
 + * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
@@ -6243,7 +6062,6 @@ index 00000000000..742c2126e60
 +#include "oops/objArrayKlass.hpp"
 +#include "runtime/frame.inline.hpp"
 +#include "runtime/sharedRuntime.hpp"
-+#include "utilities/powerOfTwo.hpp"
 +#include "vmreg_riscv.inline.hpp"
 +
 +#ifndef PRODUCT
@@ -6293,18 +6111,6 @@ index 00000000000..742c2126e60
 +
 +bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; }
 +
-+void LIR_Assembler::clinit_barrier(ciMethod* method) {
-+  assert(VM_Version::supports_fast_class_init_checks(), "sanity");
-+  assert(!method->holder()->is_not_initialized(), "initialization should have been started");
-+
-+  Label L_skip_barrier;
-+
-+  __ mov_metadata(t1, method->holder()->constant_encoding());
-+  __ clinit_barrier(t1, t0, &L_skip_barrier /* L_fast_path */);
-+  __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
-+  __ bind(L_skip_barrier);
-+}
-+
 +LIR_Opr LIR_Assembler::receiverOpr() {
 +  return FrameMap::receiver_opr;
 +}
@@ -6569,11 +6375,7 @@ index 00000000000..742c2126e60
 +  if (method()->is_synchronized()) {
 +    monitor_address(0, FrameMap::r10_opr);
 +    stub = new MonitorExitStub(FrameMap::r10_opr, true, 0);
-+    if (UseHeavyMonitors) {
-+      __ j(*stub->entry());
-+    } else {
-+      __ unlock_object(x15, x14, x10, *stub->entry());
-+    }
++    __ unlock_object(x15, x14, x10, *stub->entry());
 +    __ bind(*stub->continuation());
 +  }
 +
@@ -6626,7 +6428,7 @@ index 00000000000..742c2126e60
 +  return offset;
 +}
 +
-+void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) {
++void LIR_Assembler::return_op(LIR_Opr result) {
 +  assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == x10, "word returns are in x10");
 +
 +  // Pop the stack before the safepoint code
@@ -6636,18 +6438,20 @@ index 00000000000..742c2126e60
 +    __ reserved_stack_check();
 +  }
 +
-+  code_stub->set_safepoint_offset(__ offset());
-+  __ relocate(relocInfo::poll_return_type);
-+  __ safepoint_poll(*code_stub->entry(), true /* at_return */, false /* acquire */, true /* in_nmethod */);
++  address polling_page(os::get_polling_page());
++  __ read_polling_page(t0, polling_page, relocInfo::poll_return_type);
 +  __ ret();
 +}
 +
 +int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
++  address polling_page(os::get_polling_page());
 +  guarantee(info != NULL, "Shouldn't be NULL");
-+  __ get_polling_page(t0, relocInfo::poll_type);
++  assert(os::is_poll_address(polling_page), "should be");
++  int32_t offset = 0;
++  __ get_polling_page(t0, polling_page, offset, relocInfo::poll_type);
 +  add_debug_info_for_branch(info);  // This isn't just debug info:
 +                                    // it's the oop map
-+  __ read_polling_page(t0, 0, relocInfo::poll_type);
++  __ read_polling_page(t0, offset, relocInfo::poll_type);
 +  return __ offset();
 +}
 +
@@ -6878,7 +6682,7 @@ index 00000000000..742c2126e60
 +  }
 +}
 +
-+void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide) {
++void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) {
 +  LIR_Address* to_addr = dest->as_address_ptr();
 +  // t0 was used as tmp reg in as_Address, so we use t1 as compressed_src
 +  Register compressed_src = t1;
@@ -7000,7 +6804,7 @@ index 00000000000..742c2126e60
 +  reg2stack(temp, dest, dest->type(), false);
 +}
 +
-+void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide) {
++void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool /* unaligned */) {
 +  assert(src->is_address(), "should not call otherwise");
 +  assert(dest->is_register(), "should not call otherwise");
 +
@@ -7045,7 +6849,14 @@ index 00000000000..742c2126e60
 +      __ ld(dest->as_register(), as_Address(from_addr));
 +      break;
 +    case T_ADDRESS:
-+      __ ld(dest->as_register(), as_Address(from_addr));
++      // FIXME: OMG this is a horrible kludge.  Any offset from an
++      // address that matches klass_offset_in_bytes() will be loaded
++      // as a word, not a long.
++      if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) {
++        __ lwu(dest->as_register(), as_Address(from_addr));
++      } else {
++        __ ld(dest->as_register(), as_Address(from_addr));
++      }
 +      break;
 +    case T_INT:
 +      __ lw(dest->as_register(), as_Address(from_addr));
@@ -7073,10 +6884,10 @@ index 00000000000..742c2126e60
 +    if (UseCompressedOops && !wide) {
 +      __ decode_heap_oop(dest->as_register());
 +    }
-+
-+    if (!UseZGC) {
-+      // Load barrier has not yet been applied, so ZGC can't verify the oop here
-+      __ verify_oop(dest->as_register());
++    __ verify_oop(dest->as_register());
++  } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) {
++    if (UseCompressedClassPointers) {
++      __ decode_klass_not_null(dest->as_register());
 +    }
 +  }
 +}
@@ -7119,11 +6930,13 @@ index 00000000000..742c2126e60
 +  Label done;
 +  move_op(opr2, result, type, lir_patch_none, NULL,
 +          false,   // pop_fpu_stack
++          false,   // unaligned
 +          false);  // wide
 +  __ j(done);
 +  __ bind(label);
 +  move_op(opr1, result, type, lir_patch_none, NULL,
 +          false,   // pop_fpu_stack
++          false,   // unaligned
 +          false);  // wide
 +  __ bind(done);
 +}
@@ -7273,7 +7086,7 @@ index 00000000000..742c2126e60
 +    __ ld(t1, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))));
 +    __ bne(recv, t1, next_test);
 +    Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)));
-+    __ add_memory_int64(data_addr, DataLayout::counter_increment);
++    __ increment(data_addr, DataLayout::counter_increment);
 +    __ j(*update_done);
 +    __ bind(next_test);
 +  }
@@ -7285,7 +7098,7 @@ index 00000000000..742c2126e60
 +    __ ld(t1, recv_addr);
 +    __ bnez(t1, next_test);
 +    __ sd(recv, recv_addr);
-+    __ li(t1, DataLayout::counter_increment);
++    __ mv(t1, DataLayout::counter_increment);
 +    __ sd(t1, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))));
 +    __ j(*update_done);
 +    __ bind(next_test);
@@ -7470,7 +7283,7 @@ index 00000000000..742c2126e60
 +    assert(op->addr()->is_address(), "what else?");
 +    LIR_Address* addr_ptr = op->addr()->as_address_ptr();
 +    assert(addr_ptr->disp() == 0, "need 0 disp");
-+    assert(addr_ptr->index() == LIR_Opr::illegalOpr(), "need 0 index");
++    assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index");
 +    addr = as_reg(addr_ptr->base());
 +  }
 +  Register newval = as_reg(op->new_value());
@@ -7565,7 +7378,7 @@ index 00000000000..742c2126e60
 +  // With RVC a call instruction may get 2-byte aligned.
 +  // The address of the call instruction needs to be 4-byte aligned to
 +  // ensure that it does not span a cache line so that it can be patched.
-+  __ align(4);
++  __ align(NativeInstruction::instruction_size);
 +}
 +
 +void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
@@ -7586,9 +7399,14 @@ index 00000000000..742c2126e60
 +  add_call_info(code_offset(), op->info());
 +}
 +
++/* Currently, vtable-dispatch is only enabled for sparc platforms */
++void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) {
++  ShouldNotReachHere();
++}
++
 +void LIR_Assembler::emit_static_call_stub() {
 +  address call_pc = __ pc();
-+  assert((__ offset() % 4) == 0, "bad alignment");
++  MacroAssembler::assert_alignment(call_pc);
 +  address stub = __ start_a_stub(call_stub_size());
 +  if (stub == NULL) {
 +    bailout("static call stub overflow");
@@ -7711,12 +7529,16 @@ index 00000000000..742c2126e60
 +  Register obj = op->obj_opr()->as_register();  // may not be an oop
 +  Register hdr = op->hdr_opr()->as_register();
 +  Register lock = op->lock_opr()->as_register();
-+  if (UseHeavyMonitors) {
++  if (!UseFastLocking) {
 +    __ j(*op->stub()->entry());
 +  } else if (op->code() == lir_lock) {
++    Register scratch = noreg;
++    if (UseBiasedLocking) {
++      scratch = op->scratch_opr()->as_register();
++    }
 +    assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
 +    // add debug info for NullPointerException only if one is possible
-+    int null_check_offset = __ lock_object(hdr, obj, lock, *op->stub()->entry());
++    int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry());
 +    if (op->info() != NULL) {
 +      add_debug_info_for_null_check(null_check_offset, op->info());
 +    }
@@ -7729,23 +7551,6 @@ index 00000000000..742c2126e60
 +  __ bind(*op->stub()->continuation());
 +}
 +
-+void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) {
-+  Register obj = op->obj()->as_pointer_register();
-+  Register result = op->result_opr()->as_pointer_register();
-+
-+  CodeEmitInfo* info = op->info();
-+  if (info != NULL) {
-+    add_debug_info_for_null_check_here(info);
-+  }
-+
-+  if (UseCompressedClassPointers) {
-+    __ lwu(result, Address(obj, oopDesc::klass_offset_in_bytes()));
-+    __ decode_klass_not_null(result);
-+  } else {
-+    __ ld(result, Address(obj, oopDesc::klass_offset_in_bytes()));
-+  }
-+}
-+
 +void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
 +  ciMethod* method = op->profiled_method();
 +  int bci          = op->profiled_bci();
@@ -7779,7 +7584,7 @@ index 00000000000..742c2126e60
 +        ciKlass* receiver = vc_data->receiver(i);
 +        if (known_klass->equals(receiver)) {
 +          Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
-+          __ add_memory_int64(data_addr, DataLayout::counter_increment);
++          __ increment(data_addr, DataLayout::counter_increment);
 +          return;
 +        }
 +      }
@@ -7795,7 +7600,7 @@ index 00000000000..742c2126e60
 +          __ mov_metadata(t1, known_klass->constant_encoding());
 +          __ sd(t1, recv_addr);
 +          Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
-+          __ add_memory_int64(data_addr, DataLayout::counter_increment);
++          __ increment(data_addr, DataLayout::counter_increment);
 +          return;
 +        }
 +      }
@@ -7805,13 +7610,13 @@ index 00000000000..742c2126e60
 +      type_profile_helper(mdo, md, data, recv, &update_done);
 +      // Receiver did not match any saved receiver and there is no empty row for it.
 +      // Increment total counter to indicate polymorphic case.
-+      __ add_memory_int64(counter_addr, DataLayout::counter_increment);
++      __ increment(counter_addr, DataLayout::counter_increment);
 +
 +      __ bind(update_done);
 +    }
 +  } else {
 +    // Static call
-+    __ add_memory_int64(counter_addr, DataLayout::counter_increment);
++    __ increment(counter_addr, DataLayout::counter_increment);
 +  }
 +}
 +
@@ -7846,7 +7651,7 @@ index 00000000000..742c2126e60
 +
 +    if (TypeEntries::is_type_none(current_klass)) {
 +      __ beqz(t1, none);
-+      __ li(t0, (u1)TypeEntries::null_seen);
++      __ mv(t0, (u1)TypeEntries::null_seen);
 +      __ beq(t0, t1, none);
 +      // There is a chance that the checks above (re-reading profiling
 +      // data from memory) fail if another thread has just set the
@@ -7896,7 +7701,7 @@ index 00000000000..742c2126e60
 +    Label ok;
 +    __ ld(t0, mdo_addr);
 +    __ beqz(t0, ok);
-+    __ li(t1, (u1)TypeEntries::null_seen);
++    __ mv(t1, (u1)TypeEntries::null_seen);
 +    __ beq(t0, t1, ok);
 +    // may have been set by another thread
 +    __ membar(MacroAssembler::LoadLoad);
@@ -8016,11 +7821,14 @@ index 00000000000..742c2126e60
 +
 +
 +void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
-+  if (patch_code != lir_patch_none) {
++#if INCLUDE_SHENANDOAHGC
++  if (UseShenandoahGC && patch_code != lir_patch_none) {
 +    deoptimize_trap(info);
 +    return;
 +  }
++#endif
 +
++  assert(patch_code == lir_patch_none, "Patch code not supported");
 +  LIR_Address* adr = addr->as_address_ptr();
 +  Register dst = dest->as_register_lo();
 +
@@ -8063,7 +7871,7 @@ index 00000000000..742c2126e60
 +
 +void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) {
 +  if (dest->is_address() || src->is_address()) {
-+    move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, /* wide */ false);
++    move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, /*unaligned*/ false, /* wide */ false);
 +  } else {
 +    ShouldNotReachHere();
 +  }
@@ -8223,6 +8031,18 @@ index 00000000000..742c2126e60
 +  }
 +}
 +
++void LIR_Assembler::set_24bit_FPU() { Unimplemented(); }
++
++void LIR_Assembler::reset_FPU() { Unimplemented(); }
++
++void LIR_Assembler::fpop() { Unimplemented(); }
++
++void LIR_Assembler::fxch(int i) { Unimplemented(); }
++
++void LIR_Assembler::fld(int i) { Unimplemented(); }
++
++void LIR_Assembler::ffree(int i) { Unimplemented(); }
++
 +void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) {
 +  __ cmpxchg(addr, cmpval, newval, Assembler::int32, Assembler::aq /* acquire */,
 +             Assembler::rl /* release */, t0, true /* result as bool */);
@@ -8356,16 +8176,6 @@ index 00000000000..742c2126e60
 +  __ bind(done);
 +}
 +
-+void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) {
-+  _masm->code_section()->relocate(adr, relocInfo::poll_type);
-+  int pc_offset = code_offset();
-+  flush_debug_info(pc_offset);
-+  info->record_debug_info(compilation()->debug_info_recorder(), pc_offset);
-+  if (info->exception_handlers() != NULL) {
-+    compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers());
-+  }
-+}
-+
 +void LIR_Assembler::type_profile(Register obj, ciMethodData* md, Register klass_RInfo, Register k_RInfo,
 +                                 ciProfileData* data, Label* success, Label* failure,
 +                                 Label& profile_cast_success, Label& profile_cast_failure) {
@@ -8462,17 +8272,17 @@ index 00000000000..742c2126e60
 +  assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp");
 +  int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord;
 +  assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
-+  __ li(t0, c);
++  __ mv(t0, c);
 +  __ sd(t0, Address(sp, offset_from_rsp_in_bytes));
 +}
 +
 +#undef __
 diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
 new file mode 100644
-index 00000000000..051328c3a8a
+index 0000000000..2afd61a3db
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
-@@ -0,0 +1,132 @@
+@@ -0,0 +1,131 @@
 +/*
 + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
@@ -8533,8 +8343,6 @@ index 00000000000..051328c3a8a
 +                           ciMethodData *md, ciProfileData *data,
 +                           Register recv, Label* update_done);
 +
-+  void add_debug_info_for_branch(address adr, CodeEmitInfo* info);
-+
 +  void casw(Register addr, Register newval, Register cmpval);
 +  void caswu(Register addr, Register newval, Register cmpval);
 +  void casl(Register addr, Register newval, Register cmpval);
@@ -8548,6 +8356,7 @@ index 00000000000..051328c3a8a
 +    // CompiledStaticCall::to_interp_stub_size() (14) + CompiledStaticCall::to_trampoline_stub_size() (1 + 3 + address)
 +    _call_stub_size = 14 * NativeInstruction::instruction_size +
 +                      (NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size),
++    _call_aot_stub_size = 0,
 +    // See emit_exception_handler for detail
 +    // verify_not_null_oop + far_call + should_not_reach_here + invalidate_registers(DEBUG_ONLY)
 +    _exception_handler_size = DEBUG_ONLY(584) NOT_DEBUG(548), // or smaller
@@ -8607,10 +8416,10 @@ index 00000000000..051328c3a8a
 +#endif // CPU_RISCV_C1_LIRASSEMBLER_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
 new file mode 100644
-index 00000000000..e126f148cdf
+index 0000000000..c41819fc2a
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-@@ -0,0 +1,1075 @@
+@@ -0,0 +1,1094 @@
 +/*
 + * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
@@ -8651,7 +8460,6 @@ index 00000000000..e126f148cdf
 +#include "ci/ciTypeArrayKlass.hpp"
 +#include "runtime/sharedRuntime.hpp"
 +#include "runtime/stubRoutines.hpp"
-+#include "utilities/powerOfTwo.hpp"
 +#include "vmreg_riscv.inline.hpp"
 +
 +#ifdef ASSERT
@@ -8819,6 +8627,7 @@ index 00000000000..e126f148cdf
 +      break;
 +    default:
 +      ShouldNotReachHere();
++      r = NULL;
 +  }
 +  return r;
 +}
@@ -8890,6 +8699,11 @@ index 00000000000..e126f148cdf
 +
 +  // "lock" stores the address of the monitor stack slot, so this is not an oop
 +  LIR_Opr lock = new_register(T_INT);
++  // Need a scratch register for biased locking
++  LIR_Opr scratch = LIR_OprFact::illegalOpr;
++  if (UseBiasedLocking) {
++    scratch = new_register(T_INT);
++  }
 +
 +  CodeEmitInfo* info_for_exception = NULL;
 +  if (x->needs_null_check()) {
@@ -8898,7 +8712,7 @@ index 00000000000..e126f148cdf
 +  // this CodeEmitInfo must not have the xhandlers because here the
 +  // object is already locked (xhandlers expect object to be unlocked)
 +  CodeEmitInfo* info = state_for(x, x->state(), true);
-+  monitor_enter(obj.result(), lock, syncTempOpr(), LIR_OprFact::illegalOpr,
++  monitor_enter(obj.result(), lock, syncTempOpr(), scratch,
 +                x->monitor_no(), info_for_exception, info);
 +}
 +
@@ -8968,7 +8782,12 @@ index 00000000000..e126f148cdf
 +  right.load_item();
 +
 +  LIR_Opr reg = rlock(x);
-+  arithmetic_op_fpu(x->op(), reg, left.result(), right.result());
++  LIR_Opr tmp = LIR_OprFact::illegalOpr;
++  if (x->is_strictfp() && (x->op() == Bytecodes::_dmul || x->op() == Bytecodes::_ddiv)) {
++    tmp = new_register(T_DOUBLE);
++  }
++
++  arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp());
 +
 +  set_result(x, round_item(reg));
 +}
@@ -8990,7 +8809,7 @@ index 00000000000..e126f148cdf
 +      // no need to do div-by-zero check if the divisor is a non-zero constant
 +      if (c != 0) { need_zero_check = false; }
 +      // do not load right if the divisor is a power-of-2 constant
-+      if (c > 0 && is_power_of_2(c)) {
++      if (c > 0 && is_power_of_2_long(c)) {
 +        right.dont_load_item();
 +      } else {
 +        right.load_item();
@@ -9001,7 +8820,7 @@ index 00000000000..e126f148cdf
 +    if (need_zero_check) {
 +      CodeEmitInfo* info = state_for(x);
 +      __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0));
-+      __ branch(lir_cond_equal, new DivByZeroStub(info));
++      __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(info));
 +    }
 +
 +    rlock_result(x);
@@ -9075,7 +8894,7 @@ index 00000000000..e126f148cdf
 +    if (need_zero_check) {
 +      CodeEmitInfo* info = state_for(x);
 +      __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0));
-+      __ branch(lir_cond_equal, new DivByZeroStub(info));
++      __ branch(lir_cond_equal, T_INT, new DivByZeroStub(info));
 +    }
 +
 +    LIR_Opr ill = LIR_OprFact::illegalOpr;
@@ -9254,16 +9073,14 @@ index 00000000000..e126f148cdf
 +      do_LibmIntrinsic(x);
 +      break;
 +    case vmIntrinsics::_dabs: // fall through
-+    case vmIntrinsics::_dsqrt: // fall through
-+    case vmIntrinsics::_dsqrt_strict: {
++    case vmIntrinsics::_dsqrt: {
 +      assert(x->number_of_arguments() == 1, "wrong type");
 +      LIRItem value(x->argument_at(0), this);
 +      value.load_item();
 +      LIR_Opr dst = rlock_result(x);
 +
 +      switch (x->id()) {
-+        case vmIntrinsics::_dsqrt: // fall through
-+        case vmIntrinsics::_dsqrt_strict: {
++        case vmIntrinsics::_dsqrt: {
 +          __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr);
 +          break;
 +        }
@@ -9284,19 +9101,30 @@ index 00000000000..e126f148cdf
 +void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
 +  LIRItem value(x->argument_at(0), this);
 +  value.set_destroys_register();
++
 +  LIR_Opr calc_result = rlock_result(x);
 +  LIR_Opr result_reg = result_register_for(x->type());
++
 +  CallingConvention* cc = NULL;
-+  BasicTypeList signature(1);
-+  signature.append(T_DOUBLE);
-+  if (x->id() == vmIntrinsics::_dpow) { signature.append(T_DOUBLE); }
-+  cc = frame_map()->c_calling_convention(&signature);
-+  value.load_item_force(cc->at(0));
++
 +  if (x->id() == vmIntrinsics::_dpow) {
 +    LIRItem value1(x->argument_at(1), this);
++
 +    value1.set_destroys_register();
++
++    BasicTypeList signature(2);
++    signature.append(T_DOUBLE);
++    signature.append(T_DOUBLE);
++    cc = frame_map()->c_calling_convention(&signature);
++    value.load_item_force(cc->at(0));
 +    value1.load_item_force(cc->at(1));
++  } else {
++    BasicTypeList signature(1);
++    signature.append(T_DOUBLE);
++    cc = frame_map()->c_calling_convention(&signature);
++    value.load_item_force(cc->at(0));
 +  }
++
 +  switch (x->id()) {
 +    case vmIntrinsics::_dexp:
 +      if (StubRoutines::dexp() != NULL) { __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args()); }
@@ -9663,9 +9491,9 @@ index 00000000000..e126f148cdf
 +  profile_branch(x, cond);
 +  move_to_phi(x->state());
 +  if (x->x()->type()->is_float_kind()) {
-+    __ branch(lir_cond(cond), x->tsux(), x->usux());
++    __ branch(lir_cond(cond), right->type(), x->tsux(), x->usux());
 +  } else {
-+    __ branch(lir_cond(cond), x->tsux());
++    __ branch(lir_cond(cond), right->type(), x->tsux());
 +  }
 +  assert(x->default_sux() == x->fsux(), "wrong destination above");
 +  __ jump(x->default_sux());
@@ -9688,7 +9516,7 @@ index 00000000000..e126f148cdf
 +}
 diff --git a/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp
 new file mode 100644
-index 00000000000..5f1c394ab3d
+index 0000000000..0317ed9003
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp
 @@ -0,0 +1,55 @@
@@ -9721,22 +9549,22 @@ index 00000000000..5f1c394ab3d
 +#include "asm/register.hpp"
 +#include "c1/c1_LIR.hpp"
 +
-+FloatRegister LIR_Opr::as_float_reg() const {
++FloatRegister LIR_OprDesc::as_float_reg() const {
 +  return as_FloatRegister(fpu_regnr());
 +}
 +
-+FloatRegister LIR_Opr::as_double_reg() const {
++FloatRegister LIR_OprDesc::as_double_reg() const {
 +  return as_FloatRegister(fpu_regnrLo());
 +}
 +
 +// Reg2 unused.
 +LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) {
 +  assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform");
-+  return (LIR_Opr)(intptr_t)((reg1 << LIR_Opr::reg1_shift) |
-+                             (reg1 << LIR_Opr::reg2_shift) |
-+                             LIR_Opr::double_type          |
-+                             LIR_Opr::fpu_register         |
-+                             LIR_Opr::double_size);
++  return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) |
++                             (reg1 << LIR_OprDesc::reg2_shift) |
++                             LIR_OprDesc::double_type          |
++                             LIR_OprDesc::fpu_register         |
++                             LIR_OprDesc::double_size);
 +}
 +
 +#ifndef PRODUCT
@@ -9749,7 +9577,7 @@ index 00000000000..5f1c394ab3d
 +#endif // PRODUCT
 diff --git a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp
 new file mode 100644
-index 00000000000..78a61128bdd
+index 0000000000..78a61128bd
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp
 @@ -0,0 +1,33 @@
@@ -9788,7 +9616,7 @@ index 00000000000..78a61128bdd
 +}
 diff --git a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp
 new file mode 100644
-index 00000000000..d7ca7b0fd05
+index 0000000000..d7ca7b0fd0
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp
 @@ -0,0 +1,83 @@
@@ -9877,10 +9705,10 @@ index 00000000000..d7ca7b0fd05
 +#endif // CPU_RISCV_C1_LINEARSCAN_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
 new file mode 100644
-index 00000000000..6f656c8c533
+index 0000000000..957bfa1127
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-@@ -0,0 +1,432 @@
+@@ -0,0 +1,444 @@
 +/*
 + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
@@ -9916,8 +9744,8 @@ index 00000000000..6f656c8c533
 +#include "gc/shared/collectedHeap.hpp"
 +#include "interpreter/interpreter.hpp"
 +#include "oops/arrayOop.hpp"
-+#include "oops/markWord.hpp"
 +#include "runtime/basicLock.hpp"
++#include "runtime/biasedLocking.hpp"
 +#include "runtime/os.hpp"
 +#include "runtime/sharedRuntime.hpp"
 +#include "runtime/stubRoutines.hpp"
@@ -9933,7 +9761,7 @@ index 00000000000..6f656c8c533
 +  }
 +}
 +
-+int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) {
++int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) {
 +  const int aligned_mask = BytesPerWord - 1;
 +  const int hdr_offset = oopDesc::mark_offset_in_bytes();
 +  assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
@@ -9945,19 +9773,17 @@ index 00000000000..6f656c8c533
 +  // save object being locked into the BasicObjectLock
 +  sd(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
 +
-+  null_check_offset = offset();
-+
-+  if (DiagnoseSyncOnValueBasedClasses != 0) {
-+    load_klass(hdr, obj);
-+    lwu(hdr, Address(hdr, Klass::access_flags_offset()));
-+    andi(t0, hdr, JVM_ACC_IS_VALUE_BASED_CLASS);
-+    bnez(t0, slow_case, true /* is_far */);
++  if (UseBiasedLocking) {
++    assert(scratch != noreg, "should have scratch register at this point");
++    null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case);
++  } else {
++    null_check_offset = offset();
 +  }
 +
 +  // Load object header
 +  ld(hdr, Address(obj, hdr_offset));
 +  // and mark it as unlocked
-+  ori(hdr, hdr, markWord::unlocked_value);
++  ori(hdr, hdr, markOopDesc::unlocked_value);
 +  // save unlocked object header into the displaced header location on the stack
 +  sd(hdr, Address(disp_hdr, 0));
 +  // test if object header is still the same (i.e. unlocked), and if so, store the
@@ -9980,7 +9806,7 @@ index 00000000000..6f656c8c533
 +  // assuming both the stack pointer and page_size have their least
 +  // significant 2 bits cleared and page_size is a power of 2
 +  sub(hdr, hdr, sp);
-+  li(t0, aligned_mask - os::vm_page_size());
++  mv(t0, aligned_mask - os::vm_page_size());
 +  andr(hdr, hdr, t0);
 +  // for recursive locking, the result is zero => save it in the displaced header
 +  // location (NULL in the displaced hdr location indicates recursive locking)
@@ -9988,6 +9814,10 @@ index 00000000000..6f656c8c533
 +  // otherwise we don't care about the result and handle locking via runtime call
 +  bnez(hdr, slow_case, /* is_far */ true);
 +  bind(done);
++  if (PrintBiasedLockingStatistics) {
++    la(t1, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr()));
++    incrementw(Address(t1, 0));
++  }
 +  return null_check_offset;
 +}
 +
@@ -9997,13 +9827,21 @@ index 00000000000..6f656c8c533
 +  assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
 +  Label done;
 +
++  if (UseBiasedLocking) {
++    // load object
++    ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
++    biased_locking_exit(obj, hdr, done);
++  }
++
 +  // load displaced header
 +  ld(hdr, Address(disp_hdr, 0));
 +  // if the loaded hdr is NULL we had recursive locking
 +  // if we had recursive locking, we are done
 +  beqz(hdr, done);
-+  // load object
-+  ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
++  if (!UseBiasedLocking) {
++    // load object
++    ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
++  }
 +  verify_oop(obj);
 +  // test if object header is pointing to the displaced header, and if so, restore
 +  // the displaced header in the object - if the object header is not pointing to
@@ -10030,8 +9868,13 @@ index 00000000000..6f656c8c533
 +
 +void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2) {
 +  assert_different_registers(obj, klass, len);
-+  // This assumes that all prototype bits fitr in an int32_t
-+  mv(tmp1, (int32_t)(intptr_t)markWord::prototype().value());
++  if (UseBiasedLocking && !len->is_valid()) {
++    assert_different_registers(obj, klass, len, tmp1, tmp2);
++    ld(tmp1, Address(klass, Klass::prototype_header_offset()));
++  } else {
++    // This assumes that all prototype bits fitr in an int32_t
++    mv(tmp1, (int32_t)(intptr_t)markOopDesc::prototype());
++  }
 +  sd(tmp1, Address(obj, oopDesc::mark_offset_in_bytes()));
 +
 +  if (UseCompressedClassPointers) { // Take care not to kill klass
@@ -10185,15 +10028,17 @@ index 00000000000..6f656c8c533
 +}
 +
 +void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) {
++  // If we have to make this method not-entrant we'll overwrite its
++  // first instruction with a jump. For this action to be legal we
++  // must ensure that this first instruction is a J, JAL or NOP.
++  // Make it a NOP.
++  nop();
++
 +  assert(bang_size_in_bytes >= framesize, "stack bang size incorrect");
 +  // Make sure there is enough stack space for this method's activation.
 +  // Note that we do this before creating a frame.
 +  generate_stack_overflow_check(bang_size_in_bytes);
 +  MacroAssembler::build_frame(framesize);
-+
-+  // Insert nmethod entry barrier into frame.
-+  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+  bs->nmethod_entry_barrier(this);
 +}
 +
 +void C1_MacroAssembler::remove_frame(int framesize) {
@@ -10201,13 +10046,8 @@ index 00000000000..6f656c8c533
 +}
 +
 +
-+void C1_MacroAssembler::verified_entry(bool breakAtEntry) {
-+  // If we have to make this method not-entrant we'll overwrite its
-+  // first instruction with a jump. For this action to be legal we
-+  // must ensure that this first instruction is a J, JAL or NOP.
-+  // Make it a NOP.
-+
-+  nop();
++void C1_MacroAssembler::verified_entry() {
++  assert_alignment(pc());
 +}
 +
 +void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) {
@@ -10315,10 +10155,10 @@ index 00000000000..6f656c8c533
 +}
 diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp
 new file mode 100644
-index 00000000000..dfd3c17d7c7
+index 0000000000..1950cee5dd
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp
-@@ -0,0 +1,120 @@
+@@ -0,0 +1,121 @@
 +/*
 + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
@@ -10380,8 +10220,9 @@ index 00000000000..dfd3c17d7c7
 +  // hdr     : must be x10, contents destroyed
 +  // obj     : must point to the object to lock, contents preserved
 +  // disp_hdr: must point to the displaced header location, contents preserved
++  // scratch : scratch register, contents destroyed
 +  // returns code offset at which to add null check debug information
-+  int lock_object  (Register swap, Register obj, Register disp_hdr, Label& slow_case);
++  int lock_object  (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case);
 +
 +  // unlocking
 +  // hdr     : contents destroyed
@@ -10441,10 +10282,10 @@ index 00000000000..dfd3c17d7c7
 +#endif // CPU_RISCV_C1_MACROASSEMBLER_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
 new file mode 100644
-index 00000000000..f523c9ed50a
+index 0000000000..ffcca64e0b
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
-@@ -0,0 +1,1172 @@
+@@ -0,0 +1,1210 @@
 +/*
 + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
@@ -10493,7 +10334,6 @@ index 00000000000..f523c9ed50a
 +#include "runtime/stubRoutines.hpp"
 +#include "runtime/vframe.hpp"
 +#include "runtime/vframeArray.hpp"
-+#include "utilities/powerOfTwo.hpp"
 +#include "vmreg_riscv.inline.hpp"
 +
 +
@@ -10614,19 +10454,14 @@ index 00000000000..f523c9ed50a
 +  return call_RT(oop_result, metadata_result, entry, arg_num);
 +}
 +
-+enum return_state_t {
-+  does_not_return, requires_return
-+};
-+
 +// Implementation of StubFrame
 +
 +class StubFrame: public StackObj {
 + private:
 +  StubAssembler* _sasm;
-+  bool _return_state;
 +
 + public:
-+  StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state=requires_return);
++  StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments);
 +  void load_argument(int offset_in_words, Register reg);
 +
 +  ~StubFrame();
@@ -10644,9 +10479,8 @@ index 00000000000..f523c9ed50a
 +
 +#define __ _sasm->
 +
-+StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state) {
++StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments) {
 +  _sasm = sasm;
-+  _return_state = return_state;
 +  __ prologue(name, must_gc_arguments);
 +}
 +
@@ -10658,11 +10492,7 @@ index 00000000000..f523c9ed50a
 +
 +
 +StubFrame::~StubFrame() {
-+  if (_return_state == requires_return) {
-+    __ epilogue();
-+  } else {
-+    __ should_not_reach_here();
-+  }
++  __ epilogue();
 +  _sasm = NULL;
 +}
 +
@@ -10825,6 +10655,7 @@ index 00000000000..f523c9ed50a
 +  assert_cond(oop_maps != NULL);
 +  oop_maps->add_gc_map(call_offset, oop_map);
 +
++  __ should_not_reach_here();
 +  return oop_maps;
 +}
 +
@@ -10872,7 +10703,9 @@ index 00000000000..f523c9ed50a
 +      sasm->set_frame_size(frame_size);
 +      break;
 +    }
-+    default: ShouldNotReachHere();
++    default:
++      __ should_not_reach_here();
++      break;
 +  }
 +
 +  // verify that only x10 and x13 are valid at this time
@@ -10928,6 +10761,9 @@ index 00000000000..f523c9ed50a
 +      restore_live_registers(sasm, id != handle_exception_nofpu_id);
 +      break;
 +    case handle_exception_from_callee_id:
++      // Pop the return address.
++      __ leave();
++      __ ret();  // jump to exception handler
 +      break;
 +    default: ShouldNotReachHere();
 +  }
@@ -11032,37 +10868,80 @@ index 00000000000..f523c9ed50a
 +#endif
 +  __ reset_last_Java_frame(true);
 +
++  // check for pending exceptions
++  { Label L;
++    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
++    __ beqz(t0, L);
++    // exception pending => remove activation and forward to exception handler
++
++    { Label L1;
++      __ bnez(x10, L1);                                 // have we deoptimized?
++      __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
++      __ bind(L1);
++    }
++
++    // the deopt blob expects exceptions in the special fields of
++    // JavaThread, so copy and clear pending exception.
++
++    // load and clear pending exception
++    __ ld(x10, Address(xthread, Thread::pending_exception_offset()));
++    __ sd(zr, Address(xthread, Thread::pending_exception_offset()));
++
++    // check that there is really a valid exception
++    __ verify_not_null_oop(x10);
++
++    // load throwing pc: this is the return address of the stub
++    __ ld(x13, Address(fp, wordSize));
++
 +#ifdef ASSERT
-+  // Check that fields in JavaThread for exception oop and issuing pc are empty
-+  Label oop_empty;
-+  __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
-+  __ beqz(t0, oop_empty);
-+  __ stop("exception oop must be empty");
-+  __ bind(oop_empty);
++    // Check that fields in JavaThread for exception oop and issuing pc are empty
++    Label oop_empty;
++    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
++    __ beqz(t0, oop_empty);
++    __ stop("exception oop must be empty");
++    __ bind(oop_empty);
 +
-+  Label pc_empty;
-+  __ ld(t0, Address(xthread, JavaThread::exception_pc_offset()));
-+  __ beqz(t0, pc_empty);
-+  __ stop("exception pc must be empty");
-+  __ bind(pc_empty);
++    Label pc_empty;
++    __ ld(t0, Address(xthread, JavaThread::exception_pc_offset()));
++    __ beqz(t0, pc_empty);
++    __ stop("exception pc must be empty");
++    __ bind(pc_empty);
 +#endif
 +
-+  // Runtime will return true if the nmethod has been deoptimized, this is the
-+  // expected scenario and anything else is an error. Note that we maintain a
-+  // check on the result purely as a defensive measure.
-+  Label no_deopt;
-+  __ beqz(x10, no_deopt);                                // Have we deoptimized?
++    // store exception oop and throwing pc to JavaThread
++    __ sd(x10, Address(xthread, JavaThread::exception_oop_offset()));
++    __ sd(x13, Address(xthread, JavaThread::exception_pc_offset()));
++
++    restore_live_registers(sasm);
++
++    __ leave();
++
++    // Forward the exception directly to deopt blob. We can blow no
++    // registers and must leave throwing pc on the stack.  A patch may
++    // have values live in registers so the entry point with the
++    // exception in tls.
++    __ far_jump(RuntimeAddress(deopt_blob->unpack_with_exception_in_tls()));
++
++    __ bind(L);
++  }
++
++  // Runtime will return true if the nmethod has been deoptimized during
++  // the patching process. In that case we must do a deopt reexecute instead.
++  Label cont;
++
++  __ beqz(x10, cont);                                 // have we deoptimized?
 +
-+  // Perform a re-execute. The proper return address is already on the stack,
-+  // we just need to restore registers, pop all of our frames but the return
-+  // address and jump to the deopt blob.
++  // Will reexecute. Proper return address is already on the stack we just restore
++  // registers, pop all of our frame but the return address and jump to the deopt blob
 +
 +  restore_live_registers(sasm);
 +  __ leave();
 +  __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
 +
-+  __ bind(no_deopt);
-+  __ stop("deopt not performed");
++  __ bind(cont);
++  restore_live_registers(sasm);
++  __ leave();
++  __ ret();
 +
 +  return oop_maps;
 +}
@@ -11088,13 +10967,13 @@ index 00000000000..f523c9ed50a
 +
 +    case throw_div0_exception_id:
 +      {
-+        StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments);
 +        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false);
 +      }
 +      break;
 +
 +    case throw_null_pointer_exception_id:
-+      { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments, does_not_return);
++      { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments);
 +        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false);
 +      }
 +      break;
@@ -11373,14 +11252,14 @@ index 00000000000..f523c9ed50a
 +
 +    case throw_class_cast_exception_id:
 +      {
-+        StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments);
 +        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true);
 +      }
 +      break;
 +
 +    case throw_incompatible_class_change_error_id:
 +      {
-+        StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments);
 +        oop_maps = generate_exception_throw(sasm,
 +                                            CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
 +      }
@@ -11414,7 +11293,7 @@ index 00000000000..f523c9ed50a
 +        __ check_klass_subtype_slow_path(x14, x10, x12, x15, NULL, &miss);
 +
 +        // fallthrough on success:
-+        __ li(t0, 1);
++        __ mv(t0, 1);
 +        __ sd(t0, Address(sp, (result_off) * VMRegImpl::stack_slot_size)); // result
 +        __ pop_reg(RegSet::of(x10, x12, x14, x15), sp);
 +        __ ret();
@@ -11474,7 +11353,7 @@ index 00000000000..f523c9ed50a
 +
 +    case deoptimize_id:
 +      {
-+        StubFrame f(sasm, "deoptimize", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "deoptimize", dont_gc_arguments);
 +        OopMap* oop_map = save_live_registers(sasm);
 +        assert_cond(oop_map != NULL);
 +        f.load_argument(0, c_rarg1);
@@ -11493,7 +11372,7 @@ index 00000000000..f523c9ed50a
 +
 +    case throw_range_check_failed_id:
 +      {
-+        StubFrame f(sasm, "range_check_failed", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "range_check_failed", dont_gc_arguments);
 +        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true);
 +      }
 +      break;
@@ -11509,7 +11388,7 @@ index 00000000000..f523c9ed50a
 +
 +    case access_field_patching_id:
 +      {
-+        StubFrame f(sasm, "access_field_patching", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "access_field_patching", dont_gc_arguments);
 +        // we should set up register map
 +        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching));
 +      }
@@ -11517,7 +11396,7 @@ index 00000000000..f523c9ed50a
 +
 +    case load_klass_patching_id:
 +      {
-+        StubFrame f(sasm, "load_klass_patching", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "load_klass_patching", dont_gc_arguments);
 +        // we should set up register map
 +        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching));
 +      }
@@ -11525,7 +11404,7 @@ index 00000000000..f523c9ed50a
 +
 +    case load_mirror_patching_id:
 +      {
-+        StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments);
 +        // we should set up register map
 +        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching));
 +      }
@@ -11533,7 +11412,7 @@ index 00000000000..f523c9ed50a
 +
 +    case load_appendix_patching_id:
 +      {
-+        StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments);
 +        // we should set up register map
 +        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching));
 +      }
@@ -11556,14 +11435,14 @@ index 00000000000..f523c9ed50a
 +
 +    case throw_index_exception_id:
 +      {
-+        StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments);
 +        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true);
 +      }
 +      break;
 +
 +    case throw_array_store_exception_id:
 +      {
-+        StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments);
 +        // tos + 0: link
 +        //     + 1: return address
 +        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true);
@@ -11572,7 +11451,7 @@ index 00000000000..f523c9ed50a
 +
 +    case predicate_failed_trap_id:
 +      {
-+        StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments, does_not_return);
++        StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments);
 +
 +        OopMap* map = save_live_registers(sasm);
 +        assert_cond(map != NULL);
@@ -11595,7 +11474,7 @@ index 00000000000..f523c9ed50a
 +        StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments);
 +        save_live_registers(sasm);
 +
-+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast<int (*)(oopDesc*)>(SharedRuntime::dtrace_object_alloc)), c_rarg0);
++        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), c_rarg0);
 +
 +        restore_live_registers(sasm);
 +      }
@@ -11603,8 +11482,8 @@ index 00000000000..f523c9ed50a
 +
 +    default:
 +      {
-+        StubFrame f(sasm, "unimplemented entry", dont_gc_arguments, does_not_return);
-+        __ li(x10, (int) id);
++        StubFrame f(sasm, "unimplemented entry", dont_gc_arguments);
++        __ mv(x10, (int)id);
 +        __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), x10);
 +        __ should_not_reach_here();
 +      }
@@ -11619,10 +11498,10 @@ index 00000000000..f523c9ed50a
 +const char *Runtime1::pd_name_for_address(address entry) { Unimplemented(); return 0; }
 diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
 new file mode 100644
-index 00000000000..fe46f7b21c8
+index 0000000000..9316d4be02
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-@@ -0,0 +1,65 @@
+@@ -0,0 +1,71 @@
 +/*
 + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
@@ -11657,8 +11536,10 @@ index 00000000000..fe46f7b21c8
 +// Sets the default values for platform dependent flags used by the client compiler.
 +// (see c1_globals.hpp)
 +
-+#ifndef COMPILER2
++#ifndef TIERED
 +define_pd_global(bool, BackgroundCompilation,        true );
++define_pd_global(bool, UseTLAB,                      true );
++define_pd_global(bool, ResizeTLAB,                   true );
 +define_pd_global(bool, InlineIntrinsics,             true );
 +define_pd_global(bool, PreferInterpreterNativeStubs, false);
 +define_pd_global(bool, ProfileTraps,                 false);
@@ -11667,6 +11548,7 @@ index 00000000000..fe46f7b21c8
 +define_pd_global(intx, CompileThreshold,             1500 );
 +
 +define_pd_global(intx, OnStackReplacePercentage,     933  );
++define_pd_global(intx, FreqInlineSize,               325  );
 +define_pd_global(intx, NewSizeThreadIncrease,        4*K  );
 +define_pd_global(intx, InitialCodeCacheSize,         160*K);
 +define_pd_global(intx, ReservedCodeCacheSize,        32*M );
@@ -11677,25 +11559,28 @@ index 00000000000..fe46f7b21c8
 +define_pd_global(intx, CodeCacheExpansionSize,       32*K );
 +define_pd_global(uintx, CodeCacheMinBlockLength,     1);
 +define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
++define_pd_global(uintx, MetaspaceSize,               12*M );
 +define_pd_global(bool, NeverActAsServerClassMachine, true );
-+define_pd_global(uint64_t, MaxRAM,                  1ULL*G);
++define_pd_global(uint64_t, MaxRAM,                   1ULL*G);
 +define_pd_global(bool, CICompileOSR,                 true );
-+#endif // !COMPILER2
++#endif // !TIERED
 +define_pd_global(bool, UseTypeProfile,               false);
++define_pd_global(bool, RoundFPResults,               true );
 +
++define_pd_global(bool, LIRFillDelaySlots,            false);
 +define_pd_global(bool, OptimizeSinglePrecision,      true );
 +define_pd_global(bool, CSEArrayLength,               false);
 +define_pd_global(bool, TwoOperandLIRForm,            false);
 +
 +#endif // CPU_RISCV_C1_GLOBALS_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
+diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
 new file mode 100644
-index 00000000000..27770dc17aa
+index 0000000000..3da1f1c6d8
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
-@@ -0,0 +1,1646 @@
++++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
+@@ -0,0 +1,90 @@
 +/*
-+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -11719,1635 +11604,1167 @@ index 00000000000..27770dc17aa
 + *
 + */
 +
-+#include "precompiled.hpp"
-+#include "asm/assembler.hpp"
-+#include "asm/assembler.inline.hpp"
-+#include "opto/c2_MacroAssembler.hpp"
-+#include "opto/intrinsicnode.hpp"
-+#include "opto/subnode.hpp"
-+#include "runtime/stubRoutines.hpp"
-+
-+#ifdef PRODUCT
-+#define BLOCK_COMMENT(str) /* nothing */
-+#define STOP(error) stop(error)
-+#else
-+#define BLOCK_COMMENT(str) block_comment(str)
-+#define STOP(error) block_comment(error); stop(error)
-+#endif
++#ifndef CPU_RISCV_C2_GLOBALS_RISCV_HPP
++#define CPU_RISCV_C2_GLOBALS_RISCV_HPP
 +
-+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
 +
-+// short string
-+// StringUTF16.indexOfChar
-+// StringLatin1.indexOfChar
-+void C2_MacroAssembler::string_indexof_char_short(Register str1, Register cnt1,
-+                                                  Register ch, Register result,
-+                                                  bool isL)
-+{
-+  Register ch1 = t0;
-+  Register index = t1;
++// Sets the default values for platform dependent flags used by the server compiler.
++// (see c2_globals.hpp).  Alpha-sorted.
 +
-+  BLOCK_COMMENT("string_indexof_char_short {");
++define_pd_global(bool, BackgroundCompilation,        true);
++define_pd_global(bool, UseTLAB,                      true);
++define_pd_global(bool, ResizeTLAB,                   true);
++define_pd_global(bool, CICompileOSR,                 true);
++define_pd_global(bool, InlineIntrinsics,             true);
++define_pd_global(bool, PreferInterpreterNativeStubs, false);
++define_pd_global(bool, ProfileTraps,                 true);
++define_pd_global(bool, UseOnStackReplacement,        true);
++define_pd_global(bool, ProfileInterpreter,           true);
++define_pd_global(bool, TieredCompilation,            trueInTiered);
++define_pd_global(intx, CompileThreshold,             10000);
 +
-+  Label LOOP, LOOP1, LOOP4, LOOP8;
-+  Label MATCH,  MATCH1, MATCH2, MATCH3,
-+        MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH;
++define_pd_global(intx, OnStackReplacePercentage,     140);
++define_pd_global(intx, ConditionalMoveLimit,         0);
++define_pd_global(intx, FLOATPRESSURE,                32);
++define_pd_global(intx, FreqInlineSize,               325);
++define_pd_global(intx, MinJumpTableSize,             10);
++define_pd_global(intx, INTPRESSURE,                  24);
++define_pd_global(intx, InteriorEntryAlignment,       16);
++define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
++define_pd_global(intx, LoopUnrollLimit,              60);
++define_pd_global(intx, LoopPercentProfileLimit,      10);
++// InitialCodeCacheSize derived from specjbb2000 run.
++define_pd_global(intx, InitialCodeCacheSize,         2496*K); // Integral multiple of CodeCacheExpansionSize
++define_pd_global(intx, CodeCacheExpansionSize,       64*K);
 +
-+  mv(result, -1);
-+  mv(index, zr);
++// Ergonomics related flags
++define_pd_global(uint64_t,MaxRAM,                    128ULL*G);
++define_pd_global(intx, RegisterCostAreaRatio,        16000);
 +
-+  bind(LOOP);
-+  addi(t0, index, 8);
-+  ble(t0, cnt1, LOOP8);
-+  addi(t0, index, 4);
-+  ble(t0, cnt1, LOOP4);
-+  j(LOOP1);
++// Peephole and CISC spilling both break the graph, and so makes the
++// scheduler sick.
++define_pd_global(bool, OptoPeephole,                 false);
++define_pd_global(bool, UseCISCSpill,                 false);
++define_pd_global(bool, OptoScheduling,               true);
++define_pd_global(bool, OptoBundling,                 false);
++define_pd_global(bool, OptoRegScheduling,            false);
++define_pd_global(bool, SuperWordLoopUnrollAnalysis,  true);
++define_pd_global(bool, IdealizeClearArrayNode,       true);
 +
-+  bind(LOOP8);
-+  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
-+  beq(ch, ch1, MATCH);
-+  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
-+  beq(ch, ch1, MATCH1);
-+  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
-+  beq(ch, ch1, MATCH2);
-+  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
-+  beq(ch, ch1, MATCH3);
-+  isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8));
-+  beq(ch, ch1, MATCH4);
-+  isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10));
-+  beq(ch, ch1, MATCH5);
-+  isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12));
-+  beq(ch, ch1, MATCH6);
-+  isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14));
-+  beq(ch, ch1, MATCH7);
-+  addi(index, index, 8);
-+  addi(str1, str1, isL ? 8 : 16);
-+  blt(index, cnt1, LOOP);
-+  j(NOMATCH);
++define_pd_global(intx, ReservedCodeCacheSize,        48*M);
++define_pd_global(intx, NonProfiledCodeHeapSize,      21*M);
++define_pd_global(intx, ProfiledCodeHeapSize,         22*M);
++define_pd_global(intx, NonNMethodCodeHeapSize,       5*M );
++define_pd_global(uintx, CodeCacheMinBlockLength,     6);
++define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
 +
-+  bind(LOOP4);
-+  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
-+  beq(ch, ch1, MATCH);
-+  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
-+  beq(ch, ch1, MATCH1);
-+  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
-+  beq(ch, ch1, MATCH2);
-+  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
-+  beq(ch, ch1, MATCH3);
-+  addi(index, index, 4);
-+  addi(str1, str1, isL ? 4 : 8);
-+  bge(index, cnt1, NOMATCH);
++// Heap related flags
++define_pd_global(uintx,MetaspaceSize,    ScaleForWordSize(16*M));
 +
-+  bind(LOOP1);
-+  isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1));
-+  beq(ch, ch1, MATCH);
-+  addi(index, index, 1);
-+  addi(str1, str1, isL ? 1 : 2);
-+  blt(index, cnt1, LOOP1);
-+  j(NOMATCH);
++// Ergonomics related flags
++define_pd_global(bool, NeverActAsServerClassMachine, false);
 +
-+  bind(MATCH1);
-+  addi(index, index, 1);
-+  j(MATCH);
++define_pd_global(bool, TrapBasedRangeChecks,         false); // Not needed.
 +
-+  bind(MATCH2);
-+  addi(index, index, 2);
-+  j(MATCH);
++#endif // CPU_RISCV_C2_GLOBALS_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/c2_init_riscv.cpp b/src/hotspot/cpu/riscv/c2_init_riscv.cpp
+new file mode 100644
+index 0000000000..cdbd69807b
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/c2_init_riscv.cpp
+@@ -0,0 +1,38 @@
++/*
++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  bind(MATCH3);
-+  addi(index, index, 3);
-+  j(MATCH);
++#include "precompiled.hpp"
++#include "opto/compile.hpp"
++#include "opto/node.hpp"
 +
-+  bind(MATCH4);
-+  addi(index, index, 4);
-+  j(MATCH);
++// processor dependent initialization for riscv
 +
-+  bind(MATCH5);
-+  addi(index, index, 5);
-+  j(MATCH);
++extern void reg_mask_init();
 +
-+  bind(MATCH6);
-+  addi(index, index, 6);
-+  j(MATCH);
++void Compile::pd_compiler2_init() {
++  guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" );
++  reg_mask_init();
++}
+diff --git a/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp
+new file mode 100644
+index 0000000000..14a68b4502
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp
+@@ -0,0 +1,36 @@
++/*
++ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  bind(MATCH7);
-+  addi(index, index, 7);
++#ifndef CPU_RISCV_CODEBUFFER_RISCV_HPP
++#define CPU_RISCV_CODEBUFFER_RISCV_HPP
 +
-+  bind(MATCH);
-+  mv(result, index);
-+  bind(NOMATCH);
-+  BLOCK_COMMENT("} string_indexof_char_short");
-+}
++private:
++  void pd_initialize() {}
 +
-+// StringUTF16.indexOfChar
-+// StringLatin1.indexOfChar
-+void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1,
-+                                            Register ch, Register result,
-+                                            Register tmp1, Register tmp2,
-+                                            Register tmp3, Register tmp4,
-+                                            bool isL)
-+{
-+  Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG;
-+  Register ch1 = t0;
-+  Register orig_cnt = t1;
-+  Register mask1 = tmp3;
-+  Register mask2 = tmp2;
-+  Register match_mask = tmp1;
-+  Register trailing_char = tmp4;
-+  Register unaligned_elems = tmp4;
++public:
++  void flush_bundle(bool start_new_bundle) {}
 +
-+  BLOCK_COMMENT("string_indexof_char {");
-+  beqz(cnt1, NOMATCH);
++#endif // CPU_RISCV_CODEBUFFER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
+new file mode 100644
+index 0000000000..a4de342a93
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
+@@ -0,0 +1,149 @@
++/*
++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  addi(t0, cnt1, isL ? -32 : -16);
-+  bgtz(t0, DO_LONG);
-+  string_indexof_char_short(str1, cnt1, ch, result, isL);
-+  j(DONE);
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/compiledIC.hpp"
++#include "code/icBuffer.hpp"
++#include "code/nmethod.hpp"
++#include "memory/resourceArea.hpp"
++#include "runtime/mutexLocker.hpp"
++#include "runtime/safepoint.hpp"
 +
-+  bind(DO_LONG);
-+  mv(orig_cnt, cnt1);
-+  if (AvoidUnalignedAccesses) {
-+    Label ALIGNED;
-+    andi(unaligned_elems, str1, 0x7);
-+    beqz(unaligned_elems, ALIGNED);
-+    sub(unaligned_elems, unaligned_elems, 8);
-+    neg(unaligned_elems, unaligned_elems);
-+    if (!isL) {
-+      srli(unaligned_elems, unaligned_elems, 1);
-+    }
-+    // do unaligned part per element
-+    string_indexof_char_short(str1, unaligned_elems, ch, result, isL);
-+    bgez(result, DONE);
-+    mv(orig_cnt, cnt1);
-+    sub(cnt1, cnt1, unaligned_elems);
-+    bind(ALIGNED);
-+  }
++// ----------------------------------------------------------------------------
 +
-+  // duplicate ch
-+  if (isL) {
-+    slli(ch1, ch, 8);
-+    orr(ch, ch1, ch);
-+  }
-+  slli(ch1, ch, 16);
-+  orr(ch, ch1, ch);
-+  slli(ch1, ch, 32);
-+  orr(ch, ch1, ch);
++#define __ _masm.
++address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
++  precond(cbuf.stubs()->start() != badAddress);
++  precond(cbuf.stubs()->end() != badAddress);
++  // Stub is fixed up when the corresponding call is converted from
++  // calling compiled code to calling interpreted code.
++  // mv xmethod, 0
++  // jalr -4 # to self
 +
-+  if (!isL) {
-+    slli(cnt1, cnt1, 1);
++  if (mark == NULL) {
++    mark = cbuf.insts_mark();  // Get mark within main instrs section.
 +  }
 +
-+  uint64_t mask0101 = UCONST64(0x0101010101010101);
-+  uint64_t mask0001 = UCONST64(0x0001000100010001);
-+  mv(mask1, isL ? mask0101 : mask0001);
-+  uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f);
-+  uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff);
-+  mv(mask2, isL ? mask7f7f : mask7fff);
-+
-+  bind(CH1_LOOP);
-+  ld(ch1, Address(str1));
-+  addi(str1, str1, 8);
-+  addi(cnt1, cnt1, -8);
-+  compute_match_mask(ch1, ch, match_mask, mask1, mask2);
-+  bnez(match_mask, HIT);
-+  bgtz(cnt1, CH1_LOOP);
-+  j(NOMATCH);
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a stub.
++  MacroAssembler _masm(&cbuf);
 +
-+  bind(HIT);
-+  ctzc_bit(trailing_char, match_mask, isL, ch1, result);
-+  srli(trailing_char, trailing_char, 3);
-+  addi(cnt1, cnt1, 8);
-+  ble(cnt1, trailing_char, NOMATCH);
-+  // match case
-+  if (!isL) {
-+    srli(cnt1, cnt1, 1);
-+    srli(trailing_char, trailing_char, 1);
++  address base = __ start_a_stub(to_interp_stub_size());
++  int offset = __ offset();
++  if (base == NULL) {
++    return NULL;  // CodeBuffer::expand failed
 +  }
++  // static stub relocation stores the instruction address of the call
++  __ relocate(static_stub_Relocation::spec(mark));
 +
-+  sub(result, orig_cnt, cnt1);
-+  add(result, result, trailing_char);
-+  j(DONE);
-+
-+  bind(NOMATCH);
-+  mv(result, -1);
++  __ emit_static_call_stub();
 +
-+  bind(DONE);
-+  BLOCK_COMMENT("} string_indexof_char");
++  assert((__ offset() - offset) <= (int)to_interp_stub_size(), "stub too big");
++  __ end_a_stub();
++  return base;
 +}
++#undef __
 +
-+typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp);
++int CompiledStaticCall::to_interp_stub_size() {
++  // (lui, addi, slli, addi, slli, addi) + (lui, addi, slli, addi, slli) + jalr
++  return 12 * NativeInstruction::instruction_size;
++}
 +
-+// Search for needle in haystack and return index or -1
-+// x10: result
-+// x11: haystack
-+// x12: haystack_len
-+// x13: needle
-+// x14: needle_len
-+void C2_MacroAssembler::string_indexof(Register haystack, Register needle,
-+                                       Register haystack_len, Register needle_len,
-+                                       Register tmp1, Register tmp2,
-+                                       Register tmp3, Register tmp4,
-+                                       Register tmp5, Register tmp6,
-+                                       Register result, int ae)
-+{
-+  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
++int CompiledStaticCall::to_trampoline_stub_size() {
++  // Somewhat pessimistically, we count 4 instructions here (although
++  // there are only 3) because we sometimes emit an alignment nop.
++  // Trampoline stubs are always word aligned.
++  return NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size;
++}
 +
-+  Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH;
++// Relocation entries for call stub, compiled java to interpreter.
++int CompiledStaticCall::reloc_to_interp_stub() {
++  return 4; // 3 in emit_to_interp_stub + 1 in emit_call
++}
 +
-+  Register ch1 = t0;
-+  Register ch2 = t1;
-+  Register nlen_tmp = tmp1; // needle len tmp
-+  Register hlen_tmp = tmp2; // haystack len tmp
-+  Register result_tmp = tmp4;
++void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) {
++  address stub = find_stub(false /* is_aot */);
++  guarantee(stub != NULL, "stub not found");
 +
-+  bool isLL = ae == StrIntrinsicNode::LL;
++  if (TraceICs) {
++    ResourceMark rm;
++    tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s",
++                  p2i(instruction_address()),
++                  callee->name_and_sig_as_C_string());
++  }
 +
-+  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
-+  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
-+  int needle_chr_shift = needle_isL ? 0 : 1;
-+  int haystack_chr_shift = haystack_isL ? 0 : 1;
-+  int needle_chr_size = needle_isL ? 1 : 2;
-+  int haystack_chr_size = haystack_isL ? 1 : 2;
-+  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
-+                              (load_chr_insn)&MacroAssembler::lhu;
-+  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
-+                                (load_chr_insn)&MacroAssembler::lhu;
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);
++#ifndef PRODUCT
++  NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address());
 +
-+  BLOCK_COMMENT("string_indexof {");
++  // read the value once
++  volatile intptr_t data = method_holder->data();
++  assert(data == 0 || data == (intptr_t)callee(),
++         "a) MT-unsafe modification of inline cache");
++  assert(data == 0 || jump->jump_destination() == entry,
++         "b) MT-unsafe modification of inline cache");
++#endif
++  // Update stub.
++  method_holder->set_data((intptr_t)callee());
++  NativeGeneralJump::insert_unconditional(method_holder->next_instruction_address(), entry);
++  ICache::invalidate_range(stub, to_interp_stub_size());
++  // Update jump to call.
++  set_destination_mt_safe(stub);
++}
 +
-+  // Note, inline_string_indexOf() generates checks:
-+  // if (pattern.count > src.count) return -1;
-+  // if (pattern.count == 0) return 0;
++void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
++  assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
++  // Reset stub.
++  address stub = static_stub->addr();
++  assert(stub != NULL, "stub not found");
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);
++  method_holder->set_data(0);
++}
 +
-+  // We have two strings, a source string in haystack, haystack_len and a pattern string
-+  // in needle, needle_len. Find the first occurence of pattern in source or return -1.
++//-----------------------------------------------------------------------------
++// Non-product mode code
++#ifndef PRODUCT
 +
-+  // For larger pattern and source we use a simplified Boyer Moore algorithm.
-+  // With a small pattern and source we use linear scan.
++void CompiledDirectStaticCall::verify() {
++  // Verify call.
++  _call->verify();
++  _call->verify_alignment();
 +
-+  // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm.
-+  sub(result_tmp, haystack_len, needle_len);
-+  // needle_len < 8, use linear scan
-+  sub(t0, needle_len, 8);
-+  bltz(t0, LINEARSEARCH);
-+  // needle_len >= 256, use linear scan
-+  sub(t0, needle_len, 256);
-+  bgez(t0, LINEARSTUB);
-+  // needle_len >= haystack_len/4, use linear scan
-+  srli(t0, haystack_len, 2);
-+  bge(needle_len, t0, LINEARSTUB);
++  // Verify stub.
++  address stub = find_stub(false /* is_aot */);
++  assert(stub != NULL, "no stub found for static call");
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);
++  NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
 +
-+  // Boyer-Moore-Horspool introduction:
-+  // The Boyer Moore alogorithm is based on the description here:-
-+  //
-+  // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
-+  //
-+  // This describes and algorithm with 2 shift rules. The 'Bad Character' rule
-+  // and the 'Good Suffix' rule.
-+  //
-+  // These rules are essentially heuristics for how far we can shift the
-+  // pattern along the search string.
-+  //
-+  // The implementation here uses the 'Bad Character' rule only because of the
-+  // complexity of initialisation for the 'Good Suffix' rule.
-+  //
-+  // This is also known as the Boyer-Moore-Horspool algorithm:
-+  //
-+  // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
-+  //
-+  // #define ASIZE 256
-+  //
-+  //    int bm(unsigned char *pattern, int m, unsigned char *src, int n) {
-+  //      int i, j;
-+  //      unsigned c;
-+  //      unsigned char bc[ASIZE];
-+  //
-+  //      /* Preprocessing */
-+  //      for (i = 0; i < ASIZE; ++i)
-+  //        bc[i] = m;
-+  //      for (i = 0; i < m - 1; ) {
-+  //        c = pattern[i];
-+  //        ++i;
-+  //        // c < 256 for Latin1 string, so, no need for branch
-+  //        #ifdef PATTERN_STRING_IS_LATIN1
-+  //        bc[c] = m - i;
-+  //        #else
-+  //        if (c < ASIZE) bc[c] = m - i;
-+  //        #endif
-+  //      }
-+  //
-+  //      /* Searching */
-+  //      j = 0;
-+  //      while (j <= n - m) {
-+  //        c = src[i+j];
-+  //        if (pattern[m-1] == c)
-+  //          int k;
-+  //          for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
-+  //          if (k < 0) return j;
-+  //          // c < 256 for Latin1 string, so, no need for branch
-+  //          #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1
-+  //          // LL case: (c< 256) always true. Remove branch
-+  //          j += bc[pattern[j+m-1]];
-+  //          #endif
-+  //          #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF
-+  //          // UU case: need if (c<ASIZE) check. Skip 1 character if not.
-+  //          if (c < ASIZE)
-+  //            j += bc[pattern[j+m-1]];
-+  //          else
-+  //            j += 1
-+  //          #endif
-+  //          #ifdef SOURCE_IS_UTF_AND_PATTERN_IS_LATIN1
-+  //          // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.
-+  //          if (c < ASIZE)
-+  //            j += bc[pattern[j+m-1]];
-+  //          else
-+  //            j += m
-+  //          #endif
-+  //      }
-+  //      return -1;
-+  //    }
++  // Verify state.
++  assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
++}
 +
-+  // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result
-+  Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,
-+        BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;
++#endif // !PRODUCT
+diff --git a/src/hotspot/cpu/riscv/copy_riscv.hpp b/src/hotspot/cpu/riscv/copy_riscv.hpp
+new file mode 100644
+index 0000000000..05da242e35
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/copy_riscv.hpp
+@@ -0,0 +1,59 @@
++/*
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  Register haystack_end = haystack_len;
-+  Register skipch = tmp2;
++#ifndef CPU_RISCV_COPY_RISCV_HPP
++#define CPU_RISCV_COPY_RISCV_HPP
 +
-+  // pattern length is >=8, so, we can read at least 1 register for cases when
-+  // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for
-+  // UL case. We'll re-read last character in inner pre-loop code to have
-+  // single outer pre-loop load
-+  const int firstStep = isLL ? 7 : 3;
++// Inline functions for memory copy and fill.
 +
-+  const int ASIZE = 256;
-+  const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd)
++// Contains inline asm implementations
++#include OS_CPU_HEADER_INLINE(copy)
 +
-+  sub(sp, sp, ASIZE);
++static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
++  julong* to = (julong*) tohw;
++  julong  v  = ((julong) value << 32) | value;
++  while (count-- > 0) {
++    *to++ = v;
++  }
++}
 +
-+  // init BC offset table with default value: needle_len
-+  slli(t0, needle_len, 8);
-+  orr(t0, t0, needle_len); // [63...16][needle_len][needle_len]
-+  slli(tmp1, t0, 16);
-+  orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len]
-+  slli(tmp1, t0, 32);
-+  orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len]
++static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
++  pd_fill_to_words(tohw, count, value);
++}
 +
-+  mv(ch1, sp);  // ch1 is t0
-+  mv(tmp6, ASIZE / STORE_BYTES); // loop iterations
++static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
++  (void)memset(to, value, count);
++}
 +
-+  bind(BM_INIT_LOOP);
-+  // for (i = 0; i < ASIZE; ++i)
-+  //   bc[i] = m;
-+  for (int i = 0; i < 4; i++) {
-+    sd(tmp5, Address(ch1, i * wordSize));
-+  }
-+  add(ch1, ch1, 32);
-+  sub(tmp6, tmp6, 4);
-+  bgtz(tmp6, BM_INIT_LOOP);
++static void pd_zero_to_words(HeapWord* tohw, size_t count) {
++  pd_fill_to_words(tohw, count, 0);
++}
 +
-+  sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern
-+  Register orig_haystack = tmp5;
-+  mv(orig_haystack, haystack);
-+  // result_tmp = tmp4
-+  shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift);
-+  sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1
-+  mv(tmp3, needle);
++static void pd_zero_to_bytes(void* to, size_t count) {
++  (void)memset(to, 0, count);
++}
 +
-+  //  for (i = 0; i < m - 1; ) {
-+  //    c = pattern[i];
-+  //    ++i;
-+  //    // c < 256 for Latin1 string, so, no need for branch
-+  //    #ifdef PATTERN_STRING_IS_LATIN1
-+  //    bc[c] = m - i;
-+  //    #else
-+  //    if (c < ASIZE) bc[c] = m - i;
-+  //    #endif
-+  //  }
-+  bind(BCLOOP);
-+  (this->*needle_load_1chr)(ch1, Address(tmp3), noreg);
-+  add(tmp3, tmp3, needle_chr_size);
-+  if (!needle_isL) {
-+    // ae == StrIntrinsicNode::UU
-+    mv(tmp6, ASIZE);
-+    bgeu(ch1, tmp6, BCSKIP);
-+  }
-+  add(tmp4, sp, ch1);
-+  sb(ch2, Address(tmp4)); // store skip offset to BC offset table
++#endif // CPU_RISCV_COPY_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/depChecker_riscv.hpp b/src/hotspot/cpu/riscv/depChecker_riscv.hpp
+new file mode 100644
+index 0000000000..e9ff307b64
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/depChecker_riscv.hpp
+@@ -0,0 +1,32 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  bind(BCSKIP);
-+  sub(ch2, ch2, 1); // for next pattern element, skip distance -1
-+  bgtz(ch2, BCLOOP);
++#ifndef CPU_RISCV_VM_DEPCHECKER_RISCV_HPP
++#define CPU_RISCV_VM_DEPCHECKER_RISCV_HPP
 +
-+  // tmp6: pattern end, address after needle
-+  shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift);
-+  if (needle_isL == haystack_isL) {
-+    // load last 8 bytes (8LL/4UU symbols)
-+    ld(tmp6, Address(tmp6, -wordSize));
-+  } else {
-+    // UL: from UTF-16(source) search Latin1(pattern)
-+    lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols)
-+    // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d
-+    // We'll have to wait until load completed, but it's still faster than per-character loads+checks
-+    srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a
-+    slli(ch2, tmp6, XLEN - 24);
-+    srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b
-+    slli(ch1, tmp6, XLEN - 16);
-+    srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c
-+    andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d
-+    slli(ch2, ch2, 16);
-+    orr(ch2, ch2, ch1); // 0x00000b0c
-+    slli(result, tmp3, 48); // use result as temp register
-+    orr(tmp6, tmp6, result); // 0x0a00000d
-+    slli(result, ch2, 16);
-+    orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d
-+  }
++// Nothing to do on riscv
 +
-+  // i = m - 1;
-+  // skipch = j + i;
-+  // if (skipch == pattern[m - 1]
-+  //   for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
-+  // else
-+  //   move j with bad char offset table
-+  bind(BMLOOPSTR2);
-+  // compare pattern to source string backward
-+  shadd(result, nlen_tmp, haystack, result, haystack_chr_shift);
-+  (this->*haystack_load_1chr)(skipch, Address(result), noreg);
-+  sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8
-+  if (needle_isL == haystack_isL) {
-+    // re-init tmp3. It's for free because it's executed in parallel with
-+    // load above. Alternative is to initialize it before loop, but it'll
-+    // affect performance on in-order systems with 2 or more ld/st pipelines
-+    srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1]
-+  }
-+  if (!isLL) { // UU/UL case
-+    slli(ch2, nlen_tmp, 1); // offsets in bytes
-+  }
-+  bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char
-+  add(result, haystack, isLL ? nlen_tmp : ch2);
-+  ld(ch2, Address(result)); // load 8 bytes from source string
-+  mv(ch1, tmp6);
-+  if (isLL) {
-+    j(BMLOOPSTR1_AFTER_LOAD);
-+  } else {
-+    sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8
-+    j(BMLOOPSTR1_CMP);
-+  }
++#endif // CPU_RISCV_VM_DEPCHECKER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/disassembler_riscv.hpp b/src/hotspot/cpu/riscv/disassembler_riscv.hpp
+new file mode 100644
+index 0000000000..06bca5298c
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/disassembler_riscv.hpp
+@@ -0,0 +1,38 @@
++/*
++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  bind(BMLOOPSTR1);
-+  shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift);
-+  (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
-+  shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift);
-+  (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
++#ifndef CPU_RISCV_DISASSEMBLER_RISCV_HPP
++#define CPU_RISCV_DISASSEMBLER_RISCV_HPP
 +
-+  bind(BMLOOPSTR1_AFTER_LOAD);
-+  sub(nlen_tmp, nlen_tmp, 1);
-+  bltz(nlen_tmp, BMLOOPSTR1_LASTCMP);
++static int pd_instruction_alignment() {
++  return 1;
++}
 +
-+  bind(BMLOOPSTR1_CMP);
-+  beq(ch1, ch2, BMLOOPSTR1);
++static const char* pd_cpu_opts() {
++  return "";
++}
 +
-+  bind(BMSKIP);
-+  if (!isLL) {
-+    // if we've met UTF symbol while searching Latin1 pattern, then we can
-+    // skip needle_len symbols
-+    if (needle_isL != haystack_isL) {
-+      mv(result_tmp, needle_len);
-+    } else {
-+      mv(result_tmp, 1);
-+    }
-+    mv(t0, ASIZE);
-+    bgeu(skipch, t0, BMADV);
-+  }
-+  add(result_tmp, sp, skipch);
-+  lbu(result_tmp, Address(result_tmp)); // load skip offset
++#endif // CPU_RISCV_DISASSEMBLER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
+new file mode 100644
+index 0000000000..d4fcbdcbbd
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
+@@ -0,0 +1,694 @@
++/*
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  bind(BMADV);
-+  sub(nlen_tmp, needle_len, 1);
-+  // move haystack after bad char skip offset
-+  shadd(haystack, result_tmp, haystack, result, haystack_chr_shift);
-+  ble(haystack, haystack_end, BMLOOPSTR2);
-+  add(sp, sp, ASIZE);
-+  j(NOMATCH);
++#include "precompiled.hpp"
++#include "compiler/oopMap.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/resourceArea.hpp"
++#include "memory/universe.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/javaCalls.hpp"
++#include "runtime/monitorChunk.hpp"
++#include "runtime/os.inline.hpp"
++#include "runtime/signature.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "vmreg_riscv.inline.hpp"
++#ifdef COMPILER1
++#include "c1/c1_Runtime1.hpp"
++#include "runtime/vframeArray.hpp"
++#endif
 +
-+  bind(BMLOOPSTR1_LASTCMP);
-+  bne(ch1, ch2, BMSKIP);
++#ifdef ASSERT
++void RegisterMap::check_location_valid() {
++}
++#endif
 +
-+  bind(BMMATCH);
-+  sub(result, haystack, orig_haystack);
-+  if (!haystack_isL) {
-+    srli(result, result, 1);
-+  }
-+  add(sp, sp, ASIZE);
-+  j(DONE);
 +
-+  bind(LINEARSTUB);
-+  sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm
-+  bltz(t0, LINEARSEARCH);
-+  mv(result, zr);
-+  RuntimeAddress stub = NULL;
-+  if (isLL) {
-+    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll());
-+    assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
-+  } else if (needle_isL) {
-+    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul());
-+    assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
-+  } else {
-+    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu());
-+    assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
-+  }
-+  trampoline_call(stub);
-+  j(DONE);
++// Profiling/safepoint support
 +
-+  bind(NOMATCH);
-+  mv(result, -1);
-+  j(DONE);
++bool frame::safe_for_sender(JavaThread *thread) {
++  address   sp = (address)_sp;
++  address   fp = (address)_fp;
++  address   unextended_sp = (address)_unextended_sp;
 +
-+  bind(LINEARSEARCH);
-+  string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae);
++  // consider stack guards when trying to determine "safe" stack pointers
++  static size_t stack_guard_size = os::uses_stack_guard_pages() ?
++                                   (JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size()) : 0;
++  size_t usable_stack_size = thread->stack_size() - stack_guard_size;
 +
-+  bind(DONE);
-+  BLOCK_COMMENT("} string_indexof");
-+}
++  // sp must be within the usable part of the stack (not in guards)
++  bool sp_safe = (sp < thread->stack_base()) &&
++                 (sp >= thread->stack_base() - usable_stack_size);
 +
-+// string_indexof
-+// result: x10
-+// src: x11
-+// src_count: x12
-+// pattern: x13
-+// pattern_count: x14 or 1/2/3/4
-+void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register needle,
-+                                               Register haystack_len, Register needle_len,
-+                                               Register tmp1, Register tmp2,
-+                                               Register tmp3, Register tmp4,
-+                                               int needle_con_cnt, Register result, int ae)
-+{
-+  // Note:
-+  // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant
-+  // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1
-+  assert(needle_con_cnt <= 4, "Invalid needle constant count");
-+  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
 +
-+  Register ch1 = t0;
-+  Register ch2 = t1;
-+  Register hlen_neg = haystack_len, nlen_neg = needle_len;
-+  Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4;
++  if (!sp_safe) {
++    return false;
++  }
 +
-+  bool isLL = ae == StrIntrinsicNode::LL;
++  // When we are running interpreted code the machine stack pointer, SP, is
++  // set low enough so that the Java expression stack can grow and shrink
++  // without ever exceeding the machine stack bounds.  So, ESP >= SP.
 +
-+  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
-+  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
-+  int needle_chr_shift = needle_isL ? 0 : 1;
-+  int haystack_chr_shift = haystack_isL ? 0 : 1;
-+  int needle_chr_size = needle_isL ? 1 : 2;
-+  int haystack_chr_size = haystack_isL ? 1 : 2;
++  // When we call out of an interpreted method, SP is incremented so that
++  // the space between SP and ESP is removed.  The SP saved in the callee's
++  // frame is the SP *before* this increment.  So, when we walk a stack of
++  // interpreter frames the sender's SP saved in a frame might be less than
++  // the SP at the point of call.
 +
-+  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
-+                              (load_chr_insn)&MacroAssembler::lhu;
-+  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
-+                                (load_chr_insn)&MacroAssembler::lhu;
-+  load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu;
-+  load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld;
++  // So unextended sp must be within the stack but we need not to check
++  // that unextended sp >= sp
 +
-+  Label DO1, DO2, DO3, MATCH, NOMATCH, DONE;
++  bool unextended_sp_safe = (unextended_sp < thread->stack_base());
 +
-+  Register first = tmp3;
++  if (!unextended_sp_safe) {
++    return false;
++  }
 +
-+  if (needle_con_cnt == -1) {
-+    Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
++  // an fp must be within the stack and above (but not equal) sp
++  // second evaluation on fp+ is added to handle situation where fp is -1
++  bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base())));
 +
-+    sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2);
-+    bltz(t0, DOSHORT);
++  // We know sp/unextended_sp are safe only fp is questionable here
 +
-+    (this->*needle_load_1chr)(first, Address(needle), noreg);
-+    slli(t0, needle_len, needle_chr_shift);
-+    add(needle, needle, t0);
-+    neg(nlen_neg, t0);
-+    slli(t0, result_tmp, haystack_chr_shift);
-+    add(haystack, haystack, t0);
-+    neg(hlen_neg, t0);
++  // If the current frame is known to the code cache then we can attempt to
++  // to construct the sender and do some validation of it. This goes a long way
++  // toward eliminating issues when we get in frame construction code
 +
-+    bind(FIRST_LOOP);
-+    add(t0, haystack, hlen_neg);
-+    (this->*haystack_load_1chr)(ch2, Address(t0), noreg);
-+    beq(first, ch2, STR1_LOOP);
++  if (_cb != NULL) {
 +
-+    bind(STR2_NEXT);
-+    add(hlen_neg, hlen_neg, haystack_chr_size);
-+    blez(hlen_neg, FIRST_LOOP);
-+    j(NOMATCH);
++    // First check if frame is complete and tester is reliable
++    // Unfortunately we can only check frame complete for runtime stubs and nmethod
++    // other generic buffer blobs are more problematic so we just assume they are
++    // ok. adapter blobs never have a frame complete and are never ok.
 +
-+    bind(STR1_LOOP);
-+    add(nlen_tmp, nlen_neg, needle_chr_size);
-+    add(hlen_tmp, hlen_neg, haystack_chr_size);
-+    bgez(nlen_tmp, MATCH);
++    if (!_cb->is_frame_complete_at(_pc)) {
++      if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) {
++        return false;
++      }
++    }
 +
-+    bind(STR1_NEXT);
-+    add(ch1, needle, nlen_tmp);
-+    (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
-+    add(ch2, haystack, hlen_tmp);
-+    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
-+    bne(ch1, ch2, STR2_NEXT);
-+    add(nlen_tmp, nlen_tmp, needle_chr_size);
-+    add(hlen_tmp, hlen_tmp, haystack_chr_size);
-+    bltz(nlen_tmp, STR1_NEXT);
-+    j(MATCH);
++    // Could just be some random pointer within the codeBlob
++    if (!_cb->code_contains(_pc)) {
++      return false;
++    }
 +
-+    bind(DOSHORT);
-+    if (needle_isL == haystack_isL) {
-+      sub(t0, needle_len, 2);
-+      bltz(t0, DO1);
-+      bgtz(t0, DO3);
++    // Entry frame checks
++    if (is_entry_frame()) {
++      // an entry frame must have a valid fp.
++      return fp_safe && is_entry_frame_valid(thread);
 +    }
-+  }
 +
-+  if (needle_con_cnt == 4) {
-+    Label CH1_LOOP;
-+    (this->*load_4chr)(ch1, Address(needle), noreg);
-+    sub(result_tmp, haystack_len, 4);
-+    slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp
-+    add(haystack, haystack, tmp3);
-+    neg(hlen_neg, tmp3);
++    intptr_t* sender_sp = NULL;
++    intptr_t* sender_unextended_sp = NULL;
++    address   sender_pc = NULL;
++    intptr_t* saved_fp =  NULL;
 +
-+    bind(CH1_LOOP);
-+    add(ch2, haystack, hlen_neg);
-+    (this->*load_4chr)(ch2, Address(ch2), noreg);
-+    beq(ch1, ch2, MATCH);
-+    add(hlen_neg, hlen_neg, haystack_chr_size);
-+    blez(hlen_neg, CH1_LOOP);
-+    j(NOMATCH);
-+  }
++    if (is_interpreted_frame()) {
++      // fp must be safe
++      if (!fp_safe) {
++        return false;
++      }
 +
-+  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) {
-+    Label CH1_LOOP;
-+    BLOCK_COMMENT("string_indexof DO2 {");
-+    bind(DO2);
-+    (this->*load_2chr)(ch1, Address(needle), noreg);
-+    if (needle_con_cnt == 2) {
-+      sub(result_tmp, haystack_len, 2);
-+    }
-+    slli(tmp3, result_tmp, haystack_chr_shift);
-+    add(haystack, haystack, tmp3);
-+    neg(hlen_neg, tmp3);
++      sender_pc = (address)this->fp()[return_addr_offset];
++      // for interpreted frames, the value below is the sender "raw" sp,
++      // which can be different from the sender unextended sp (the sp seen
++      // by the sender) because of current frame local variables
++      sender_sp = (intptr_t*) addr_at(sender_sp_offset);
++      sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset];
++      saved_fp = (intptr_t*) this->fp()[link_offset];
++    } else {
++      // must be some sort of compiled/runtime frame
++      // fp does not have to be safe (although it could be check for c1?)
 +
-+    bind(CH1_LOOP);
-+    add(tmp3, haystack, hlen_neg);
-+    (this->*load_2chr)(ch2, Address(tmp3), noreg);
-+    beq(ch1, ch2, MATCH);
-+    add(hlen_neg, hlen_neg, haystack_chr_size);
-+    blez(hlen_neg, CH1_LOOP);
-+    j(NOMATCH);
-+    BLOCK_COMMENT("} string_indexof DO2");
-+  }
++      // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc
++      if (_cb->frame_size() <= 0) {
++        return false;
++      }
 +
-+  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) {
-+    Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
-+    BLOCK_COMMENT("string_indexof DO3 {");
++      sender_sp = _unextended_sp + _cb->frame_size();
++      // Is sender_sp safe?
++      if ((address)sender_sp >= thread->stack_base()) {
++        return false;
++      }
 +
-+    bind(DO3);
-+    (this->*load_2chr)(first, Address(needle), noreg);
-+    (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg);
-+    if (needle_con_cnt == 3) {
-+      sub(result_tmp, haystack_len, 3);
++      sender_unextended_sp = sender_sp;
++      sender_pc = (address) *(sender_sp - 1);
++      saved_fp = (intptr_t*) *(sender_sp - 2);
 +    }
-+    slli(hlen_tmp, result_tmp, haystack_chr_shift);
-+    add(haystack, haystack, hlen_tmp);
-+    neg(hlen_neg, hlen_tmp);
-+
-+    bind(FIRST_LOOP);
-+    add(ch2, haystack, hlen_neg);
-+    (this->*load_2chr)(ch2, Address(ch2), noreg);
-+    beq(first, ch2, STR1_LOOP);
 +
-+    bind(STR2_NEXT);
-+    add(hlen_neg, hlen_neg, haystack_chr_size);
-+    blez(hlen_neg, FIRST_LOOP);
-+    j(NOMATCH);
 +
-+    bind(STR1_LOOP);
-+    add(hlen_tmp, hlen_neg, 2 * haystack_chr_size);
-+    add(ch2, haystack, hlen_tmp);
-+    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
-+    bne(ch1, ch2, STR2_NEXT);
-+    j(MATCH);
-+    BLOCK_COMMENT("} string_indexof DO3");
-+  }
++    // If the potential sender is the interpreter then we can do some more checking
++    if (Interpreter::contains(sender_pc)) {
 +
-+  if (needle_con_cnt == -1 || needle_con_cnt == 1) {
-+    Label DO1_LOOP;
++      // fp is always saved in a recognizable place in any code we generate. However
++      // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved fp
++      // is really a frame pointer.
 +
-+    BLOCK_COMMENT("string_indexof DO1 {");
-+    bind(DO1);
-+    (this->*needle_load_1chr)(ch1, Address(needle), noreg);
-+    sub(result_tmp, haystack_len, 1);
-+    mv(tmp3, result_tmp);
-+    if (haystack_chr_shift) {
-+      slli(tmp3, result_tmp, haystack_chr_shift);
-+    }
-+    add(haystack, haystack, tmp3);
-+    neg(hlen_neg, tmp3);
++      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
 +
-+    bind(DO1_LOOP);
-+    add(tmp3, haystack, hlen_neg);
-+    (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg);
-+    beq(ch1, ch2, MATCH);
-+    add(hlen_neg, hlen_neg, haystack_chr_size);
-+    blez(hlen_neg, DO1_LOOP);
-+    BLOCK_COMMENT("} string_indexof DO1");
-+  }
++      if (!saved_fp_safe) {
++        return false;
++      }
 +
-+  bind(NOMATCH);
-+  mv(result, -1);
-+  j(DONE);
++      // construct the potential sender
++      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
 +
-+  bind(MATCH);
-+  srai(t0, hlen_neg, haystack_chr_shift);
-+  add(result, result_tmp, t0);
++      return sender.is_interpreted_frame_valid(thread);
++    }
 +
-+  bind(DONE);
-+}
++    // We must always be able to find a recognizable pc
++    CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc);
++    if (sender_pc == NULL || sender_blob == NULL) {
++      return false;
++    }
 +
-+// Compare strings.
-+void C2_MacroAssembler::string_compare(Register str1, Register str2,
-+                                    Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
-+                                    Register tmp3, int ae)
-+{
-+  Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
-+      DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
-+      SHORT_LOOP_START, TAIL_CHECK, L;
++    // Could be a zombie method
++    if (sender_blob->is_zombie() || sender_blob->is_unloaded()) {
++      return false;
++    }
 +
-+  const int STUB_THRESHOLD = 64 + 8;
-+  bool isLL = ae == StrIntrinsicNode::LL;
-+  bool isLU = ae == StrIntrinsicNode::LU;
-+  bool isUL = ae == StrIntrinsicNode::UL;
++    // Could just be some random pointer within the codeBlob
++    if (!sender_blob->code_contains(sender_pc)) {
++      return false;
++    }
 +
-+  bool str1_isL = isLL || isLU;
-+  bool str2_isL = isLL || isUL;
++    // We should never be able to see an adapter if the current frame is something from code cache
++    if (sender_blob->is_adapter_blob()) {
++      return false;
++    }
 +
-+  // for L strings, 1 byte for 1 character
-+  // for U strings, 2 bytes for 1 character
-+  int str1_chr_size = str1_isL ? 1 : 2;
-+  int str2_chr_size = str2_isL ? 1 : 2;
-+  int minCharsInWord = isLL ? wordSize : wordSize / 2;
++    // Could be the call_stub
++    if (StubRoutines::returns_to_call_stub(sender_pc)) {
++      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
 +
-+  load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
-+  load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
++      if (!saved_fp_safe) {
++        return false;
++      }
 +
-+  BLOCK_COMMENT("string_compare {");
++      // construct the potential sender
++      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
 +
-+  // Bizzarely, the counts are passed in bytes, regardless of whether they
-+  // are L or U strings, however the result is always in characters.
-+  if (!str1_isL) {
-+    sraiw(cnt1, cnt1, 1);
-+  }
-+  if (!str2_isL) {
-+    sraiw(cnt2, cnt2, 1);
-+  }
++      // Validate the JavaCallWrapper an entry frame must have
++      address jcw = (address)sender.entry_frame_call_wrapper();
 +
-+  // Compute the minimum of the string lengths and save the difference in result.
-+  sub(result, cnt1, cnt2);
-+  bgt(cnt1, cnt2, L);
-+  mv(cnt2, cnt1);
-+  bind(L);
++      bool jcw_safe = (jcw < thread->stack_base()) && (jcw > (address)sender.fp());
 +
-+  // A very short string
-+  li(t0, minCharsInWord);
-+  ble(cnt2, t0, SHORT_STRING);
++      return jcw_safe;
++    }
 +
-+  // Compare longwords
-+  // load first parts of strings and finish initialization while loading
-+  {
-+    if (str1_isL == str2_isL) { // LL or UU
-+      // load 8 bytes once to compare
-+      ld(tmp1, Address(str1));
-+      beq(str1, str2, DONE);
-+      ld(tmp2, Address(str2));
-+      li(t0, STUB_THRESHOLD);
-+      bge(cnt2, t0, STUB);
-+      sub(cnt2, cnt2, minCharsInWord);
-+      beqz(cnt2, TAIL_CHECK);
-+      // convert cnt2 from characters to bytes
-+      if (!str1_isL) {
-+        slli(cnt2, cnt2, 1);
++    CompiledMethod* nm = sender_blob->as_compiled_method_or_null();
++    if (nm != NULL) {
++      if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) ||
++          nm->method()->is_method_handle_intrinsic()) {
++        return false;
 +      }
-+      add(str2, str2, cnt2);
-+      add(str1, str1, cnt2);
-+      sub(cnt2, zr, cnt2);
-+    } else if (isLU) { // LU case
-+      lwu(tmp1, Address(str1));
-+      ld(tmp2, Address(str2));
-+      li(t0, STUB_THRESHOLD);
-+      bge(cnt2, t0, STUB);
-+      addi(cnt2, cnt2, -4);
-+      add(str1, str1, cnt2);
-+      sub(cnt1, zr, cnt2);
-+      slli(cnt2, cnt2, 1);
-+      add(str2, str2, cnt2);
-+      inflate_lo32(tmp3, tmp1);
-+      mv(tmp1, tmp3);
-+      sub(cnt2, zr, cnt2);
-+      addi(cnt1, cnt1, 4);
-+    } else { // UL case
-+      ld(tmp1, Address(str1));
-+      lwu(tmp2, Address(str2));
-+      li(t0, STUB_THRESHOLD);
-+      bge(cnt2, t0, STUB);
-+      addi(cnt2, cnt2, -4);
-+      slli(t0, cnt2, 1);
-+      sub(cnt1, zr, t0);
-+      add(str1, str1, t0);
-+      add(str2, str2, cnt2);
-+      inflate_lo32(tmp3, tmp2);
-+      mv(tmp2, tmp3);
-+      sub(cnt2, zr, cnt2);
-+      addi(cnt1, cnt1, 8);
 +    }
-+    addi(cnt2, cnt2, isUL ? 4 : 8);
-+    bgez(cnt2, TAIL);
-+    xorr(tmp3, tmp1, tmp2);
-+    bnez(tmp3, DIFFERENCE);
 +
-+    // main loop
-+    bind(NEXT_WORD);
-+    if (str1_isL == str2_isL) { // LL or UU
-+      add(t0, str1, cnt2);
-+      ld(tmp1, Address(t0));
-+      add(t0, str2, cnt2);
-+      ld(tmp2, Address(t0));
-+      addi(cnt2, cnt2, 8);
-+    } else if (isLU) { // LU case
-+      add(t0, str1, cnt1);
-+      lwu(tmp1, Address(t0));
-+      add(t0, str2, cnt2);
-+      ld(tmp2, Address(t0));
-+      addi(cnt1, cnt1, 4);
-+      inflate_lo32(tmp3, tmp1);
-+      mv(tmp1, tmp3);
-+      addi(cnt2, cnt2, 8);
-+    } else { // UL case
-+      add(t0, str2, cnt2);
-+      lwu(tmp2, Address(t0));
-+      add(t0, str1, cnt1);
-+      ld(tmp1, Address(t0));
-+      inflate_lo32(tmp3, tmp2);
-+      mv(tmp2, tmp3);
-+      addi(cnt1, cnt1, 8);
-+      addi(cnt2, cnt2, 4);
++    // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size
++    // because the return address counts against the callee's frame.
++    if (sender_blob->frame_size() <= 0) {
++      assert(!sender_blob->is_compiled(), "should count return address at least");
++      return false;
 +    }
-+    bgez(cnt2, TAIL);
 +
-+    xorr(tmp3, tmp1, tmp2);
-+    beqz(tmp3, NEXT_WORD);
-+    j(DIFFERENCE);
-+    bind(TAIL);
-+    xorr(tmp3, tmp1, tmp2);
-+    bnez(tmp3, DIFFERENCE);
-+    // Last longword.  In the case where length == 4 we compare the
-+    // same longword twice, but that's still faster than another
-+    // conditional branch.
-+    if (str1_isL == str2_isL) { // LL or UU
-+      ld(tmp1, Address(str1));
-+      ld(tmp2, Address(str2));
-+    } else if (isLU) { // LU case
-+      lwu(tmp1, Address(str1));
-+      ld(tmp2, Address(str2));
-+      inflate_lo32(tmp3, tmp1);
-+      mv(tmp1, tmp3);
-+    } else { // UL case
-+      lwu(tmp2, Address(str2));
-+      ld(tmp1, Address(str1));
-+      inflate_lo32(tmp3, tmp2);
-+      mv(tmp2, tmp3);
++    // We should never be able to see anything here except an nmethod. If something in the
++    // code cache (current frame) is called by an entity within the code cache that entity
++    // should not be anything but the call stub (already covered), the interpreter (already covered)
++    // or an nmethod.
++    if (!sender_blob->is_compiled()) {
++        return false;
 +    }
-+    bind(TAIL_CHECK);
-+    xorr(tmp3, tmp1, tmp2);
-+    beqz(tmp3, DONE);
 +
-+    // Find the first different characters in the longwords and
-+    // compute their difference.
-+    bind(DIFFERENCE);
-+    ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb
-+    srl(tmp1, tmp1, result);
-+    srl(tmp2, tmp2, result);
-+    if (isLL) {
-+      andi(tmp1, tmp1, 0xFF);
-+      andi(tmp2, tmp2, 0xFF);
-+    } else {
-+      andi(tmp1, tmp1, 0xFFFF);
-+      andi(tmp2, tmp2, 0xFFFF);
-+    }
-+    sub(result, tmp1, tmp2);
-+    j(DONE);
++    // Could put some more validation for the potential non-interpreted sender
++    // frame we'd create by calling sender if I could think of any. Wait for next crash in forte...
++
++    // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb
++
++    // We've validated the potential sender that would be created
++    return true;
 +  }
 +
-+  bind(STUB);
-+  RuntimeAddress stub = NULL;
-+  switch (ae) {
-+    case StrIntrinsicNode::LL:
-+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL());
-+      break;
-+    case StrIntrinsicNode::UU:
-+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU());
-+      break;
-+    case StrIntrinsicNode::LU:
-+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU());
-+      break;
-+    case StrIntrinsicNode::UL:
-+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL());
-+      break;
-+    default:
-+      ShouldNotReachHere();
++  // Must be native-compiled frame. Since sender will try and use fp to find
++  // linkages it must be safe
++  if (!fp_safe) {
++    return false;
 +  }
-+  assert(stub.target() != NULL, "compare_long_string stub has not been generated");
-+  trampoline_call(stub);
-+  j(DONE);
 +
-+  bind(SHORT_STRING);
-+  // Is the minimum length zero?
-+  beqz(cnt2, DONE);
-+  // arrange code to do most branches while loading and loading next characters
-+  // while comparing previous
-+  (this->*str1_load_chr)(tmp1, Address(str1), t0);
-+  addi(str1, str1, str1_chr_size);
-+  addi(cnt2, cnt2, -1);
-+  beqz(cnt2, SHORT_LAST_INIT);
-+  (this->*str2_load_chr)(cnt1, Address(str2), t0);
-+  addi(str2, str2, str2_chr_size);
-+  j(SHORT_LOOP_START);
-+  bind(SHORT_LOOP);
-+  addi(cnt2, cnt2, -1);
-+  beqz(cnt2, SHORT_LAST);
-+  bind(SHORT_LOOP_START);
-+  (this->*str1_load_chr)(tmp2, Address(str1), t0);
-+  addi(str1, str1, str1_chr_size);
-+  (this->*str2_load_chr)(t0, Address(str2), t0);
-+  addi(str2, str2, str2_chr_size);
-+  bne(tmp1, cnt1, SHORT_LOOP_TAIL);
-+  addi(cnt2, cnt2, -1);
-+  beqz(cnt2, SHORT_LAST2);
-+  (this->*str1_load_chr)(tmp1, Address(str1), t0);
-+  addi(str1, str1, str1_chr_size);
-+  (this->*str2_load_chr)(cnt1, Address(str2), t0);
-+  addi(str2, str2, str2_chr_size);
-+  beq(tmp2, t0, SHORT_LOOP);
-+  sub(result, tmp2, t0);
-+  j(DONE);
-+  bind(SHORT_LOOP_TAIL);
-+  sub(result, tmp1, cnt1);
-+  j(DONE);
-+  bind(SHORT_LAST2);
-+  beq(tmp2, t0, DONE);
-+  sub(result, tmp2, t0);
-+
-+  j(DONE);
-+  bind(SHORT_LAST_INIT);
-+  (this->*str2_load_chr)(cnt1, Address(str2), t0);
-+  addi(str2, str2, str2_chr_size);
-+  bind(SHORT_LAST);
-+  beq(tmp1, cnt1, DONE);
-+  sub(result, tmp1, cnt1);
-+
-+  bind(DONE);
-+
-+  BLOCK_COMMENT("} string_compare");
-+}
-+
-+void C2_MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
-+                                      Register tmp4, Register tmp5, Register tmp6, Register result,
-+                                      Register cnt1, int elem_size) {
-+  Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR;
-+  Register tmp1 = t0;
-+  Register tmp2 = t1;
-+  Register cnt2 = tmp2;  // cnt2 only used in array length compare
-+  Register elem_per_word = tmp6;
-+  int log_elem_size = exact_log2(elem_size);
-+  int length_offset = arrayOopDesc::length_offset_in_bytes();
-+  int base_offset   = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
-+
-+  assert(elem_size == 1 || elem_size == 2, "must be char or byte");
-+  assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6);
-+  li(elem_per_word, wordSize / elem_size);
-+
-+  BLOCK_COMMENT("arrays_equals {");
-+
-+  // if (a1 == a2), return true
-+  beq(a1, a2, SAME);
-+
-+  mv(result, false);
-+  beqz(a1, DONE);
-+  beqz(a2, DONE);
-+  lwu(cnt1, Address(a1, length_offset));
-+  lwu(cnt2, Address(a2, length_offset));
-+  bne(cnt2, cnt1, DONE);
-+  beqz(cnt1, SAME);
-+
-+  slli(tmp5, cnt1, 3 + log_elem_size);
-+  sub(tmp5, zr, tmp5);
-+  add(a1, a1, base_offset);
-+  add(a2, a2, base_offset);
-+  ld(tmp3, Address(a1, 0));
-+  ld(tmp4, Address(a2, 0));
-+  ble(cnt1, elem_per_word, SHORT); // short or same
-+
-+  // Main 16 byte comparison loop with 2 exits
-+  bind(NEXT_DWORD); {
-+    ld(tmp1, Address(a1, wordSize));
-+    ld(tmp2, Address(a2, wordSize));
-+    sub(cnt1, cnt1, 2 * wordSize / elem_size);
-+    blez(cnt1, TAIL);
-+    bne(tmp3, tmp4, DONE);
-+    ld(tmp3, Address(a1, 2 * wordSize));
-+    ld(tmp4, Address(a2, 2 * wordSize));
-+    add(a1, a1, 2 * wordSize);
-+    add(a2, a2, 2 * wordSize);
-+    ble(cnt1, elem_per_word, TAIL2);
-+  } beq(tmp1, tmp2, NEXT_DWORD);
-+  j(DONE);
-+
-+  bind(TAIL);
-+  xorr(tmp4, tmp3, tmp4);
-+  xorr(tmp2, tmp1, tmp2);
-+  sll(tmp2, tmp2, tmp5);
-+  orr(tmp5, tmp4, tmp2);
-+  j(IS_TMP5_ZR);
-+
-+  bind(TAIL2);
-+  bne(tmp1, tmp2, DONE);
-+
-+  bind(SHORT);
-+  xorr(tmp4, tmp3, tmp4);
-+  sll(tmp5, tmp4, tmp5);
-+
-+  bind(IS_TMP5_ZR);
-+  bnez(tmp5, DONE);
-+
-+  bind(SAME);
-+  mv(result, true);
-+  // That's it.
-+  bind(DONE);
++  // Will the pc we fetch be non-zero (which we'll find at the oldest frame)
++  if ((address)this->fp()[return_addr_offset] == NULL) { return false; }
 +
-+  BLOCK_COMMENT("} array_equals");
++  return true;
 +}
 +
-+// Compare Strings
-+
-+// For Strings we're passed the address of the first characters in a1
-+// and a2 and the length in cnt1.
-+// elem_size is the element size in bytes: either 1 or 2.
-+// There are two implementations.  For arrays >= 8 bytes, all
-+// comparisons (including the final one, which may overlap) are
-+// performed 8 bytes at a time.  For strings < 8 bytes, we compare a
-+// halfword, then a short, and then a byte.
-+
-+void C2_MacroAssembler::string_equals(Register a1, Register a2,
-+                                      Register result, Register cnt1, int elem_size)
-+{
-+  Label SAME, DONE, SHORT, NEXT_WORD;
-+  Register tmp1 = t0;
-+  Register tmp2 = t1;
-+
-+  assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte");
-+  assert_different_registers(a1, a2, result, cnt1, t0, t1);
-+
-+  BLOCK_COMMENT("string_equals {");
-+
-+  mv(result, false);
-+
-+  // Check for short strings, i.e. smaller than wordSize.
-+  sub(cnt1, cnt1, wordSize);
-+  bltz(cnt1, SHORT);
-+
-+  // Main 8 byte comparison loop.
-+  bind(NEXT_WORD); {
-+    ld(tmp1, Address(a1, 0));
-+    add(a1, a1, wordSize);
-+    ld(tmp2, Address(a2, 0));
-+    add(a2, a2, wordSize);
-+    sub(cnt1, cnt1, wordSize);
-+    bne(tmp1, tmp2, DONE);
-+  } bgtz(cnt1, NEXT_WORD);
-+
-+  // Last longword.  In the case where length == 4 we compare the
-+  // same longword twice, but that's still faster than another
-+  // conditional branch.
-+  // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
-+  // length == 4.
-+  add(tmp1, a1, cnt1);
-+  ld(tmp1, Address(tmp1, 0));
-+  add(tmp2, a2, cnt1);
-+  ld(tmp2, Address(tmp2, 0));
-+  bne(tmp1, tmp2, DONE);
-+  j(SAME);
-+
-+  bind(SHORT);
-+  Label TAIL03, TAIL01;
-+
-+  // 0-7 bytes left.
-+  andi(t0, cnt1, 4);
-+  beqz(t0, TAIL03);
-+  {
-+    lwu(tmp1, Address(a1, 0));
-+    add(a1, a1, 4);
-+    lwu(tmp2, Address(a2, 0));
-+    add(a2, a2, 4);
-+    bne(tmp1, tmp2, DONE);
-+  }
-+
-+  bind(TAIL03);
-+  // 0-3 bytes left.
-+  andi(t0, cnt1, 2);
-+  beqz(t0, TAIL01);
-+  {
-+    lhu(tmp1, Address(a1, 0));
-+    add(a1, a1, 2);
-+    lhu(tmp2, Address(a2, 0));
-+    add(a2, a2, 2);
-+    bne(tmp1, tmp2, DONE);
++void frame::patch_pc(Thread* thread, address pc) {
++  address* pc_addr = &(((address*) sp())[-1]);
++  if (TracePcPatching) {
++    tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
++                  p2i(pc_addr), p2i(*pc_addr), p2i(pc));
 +  }
-+
-+  bind(TAIL01);
-+  if (elem_size == 1) { // Only needed when comparing 1-byte elements
-+    // 0-1 bytes left.
-+    andi(t0, cnt1, 1);
-+    beqz(t0, SAME);
-+    {
-+      lbu(tmp1, a1, 0);
-+      lbu(tmp2, a2, 0);
-+      bne(tmp1, tmp2, DONE);
-+    }
++  // Either the return address is the original one or we are going to
++  // patch in the same address that's already there.
++  assert(_pc == *pc_addr || pc == *pc_addr, "must be");
++  *pc_addr = pc;
++  _cb = CodeCache::find_blob(pc);
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    assert(original_pc == _pc, "expected original PC to be stored before patching");
++    _deopt_state = is_deoptimized;
++    // leave _pc as is
++  } else {
++    _deopt_state = not_deoptimized;
++    _pc = pc;
 +  }
++}
 +
-+  // Arrays are equal.
-+  bind(SAME);
-+  mv(result, true);
++bool frame::is_interpreted_frame() const  {
++  return Interpreter::contains(pc());
++}
 +
-+  // That's it.
-+  bind(DONE);
-+  BLOCK_COMMENT("} string_equals");
++int frame::frame_size(RegisterMap* map) const {
++  frame sender = this->sender(map);
++  return sender.sp() - sp();
 +}
 +
-+typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far);
-+typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label,
-+                                                              bool is_far, bool is_unordered);
++intptr_t* frame::entry_frame_argument_at(int offset) const {
++  // convert offset to index to deal with tsi
++  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
++  // Entry frame's arguments are always in relation to unextended_sp()
++  return &unextended_sp()[index];
++}
 +
-+static conditional_branch_insn conditional_branches[] =
-+{
-+  /* SHORT branches */
-+  (conditional_branch_insn)&Assembler::beq,
-+  (conditional_branch_insn)&Assembler::bgt,
-+  NULL, // BoolTest::overflow
-+  (conditional_branch_insn)&Assembler::blt,
-+  (conditional_branch_insn)&Assembler::bne,
-+  (conditional_branch_insn)&Assembler::ble,
-+  NULL, // BoolTest::no_overflow
-+  (conditional_branch_insn)&Assembler::bge,
++// sender_sp
++intptr_t* frame::interpreter_frame_sender_sp() const {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  return (intptr_t*) at(interpreter_frame_sender_sp_offset);
++}
 +
-+  /* UNSIGNED branches */
-+  (conditional_branch_insn)&Assembler::beq,
-+  (conditional_branch_insn)&Assembler::bgtu,
-+  NULL,
-+  (conditional_branch_insn)&Assembler::bltu,
-+  (conditional_branch_insn)&Assembler::bne,
-+  (conditional_branch_insn)&Assembler::bleu,
-+  NULL,
-+  (conditional_branch_insn)&Assembler::bgeu
-+};
++void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  ptr_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp);
++}
 +
-+static float_conditional_branch_insn float_conditional_branches[] =
-+{
-+  /* FLOAT SHORT branches */
-+  (float_conditional_branch_insn)&MacroAssembler::float_beq,
-+  (float_conditional_branch_insn)&MacroAssembler::float_bgt,
-+  NULL,  // BoolTest::overflow
-+  (float_conditional_branch_insn)&MacroAssembler::float_blt,
-+  (float_conditional_branch_insn)&MacroAssembler::float_bne,
-+  (float_conditional_branch_insn)&MacroAssembler::float_ble,
-+  NULL, // BoolTest::no_overflow
-+  (float_conditional_branch_insn)&MacroAssembler::float_bge,
 +
-+  /* DOUBLE SHORT branches */
-+  (float_conditional_branch_insn)&MacroAssembler::double_beq,
-+  (float_conditional_branch_insn)&MacroAssembler::double_bgt,
-+  NULL,
-+  (float_conditional_branch_insn)&MacroAssembler::double_blt,
-+  (float_conditional_branch_insn)&MacroAssembler::double_bne,
-+  (float_conditional_branch_insn)&MacroAssembler::double_ble,
-+  NULL,
-+  (float_conditional_branch_insn)&MacroAssembler::double_bge
-+};
++// monitor elements
 +
-+void C2_MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) {
-+  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])),
-+         "invalid conditional branch index");
-+  (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far);
++BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
++  return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset);
 +}
 +
-+// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use
-+// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode().
-+void C2_MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) {
-+  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])),
-+         "invalid float conditional branch index");
-+  int booltest_flag = cmpFlag & ~(C2_MacroAssembler::double_branch_mask);
-+  (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far,
-+    (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true);
++BasicObjectLock* frame::interpreter_frame_monitor_end() const {
++  BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset);
++  // make sure the pointer points inside the frame
++  assert(sp() <= (intptr_t*) result, "monitor end should be above the stack pointer");
++  assert((intptr_t*) result < fp(),  "monitor end should be strictly below the frame pointer");
++  return result;
 +}
 +
-+void C2_MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
-+  switch (cmpFlag) {
-+    case BoolTest::eq:
-+    case BoolTest::le:
-+      beqz(op1, L, is_far);
-+      break;
-+    case BoolTest::ne:
-+    case BoolTest::gt:
-+      bnez(op1, L, is_far);
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
++void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) {
++  *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value;
 +}
 +
-+void C2_MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
-+  switch (cmpFlag) {
-+    case BoolTest::eq:
-+      beqz(op1, L, is_far);
-+      break;
-+    case BoolTest::ne:
-+      bnez(op1, L, is_far);
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
++// Used by template based interpreter deoptimization
++void frame::interpreter_frame_set_last_sp(intptr_t* last_sp) {
++  *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = last_sp;
 +}
 +
-+void C2_MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) {
-+  Label L;
-+  cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L);
-+  mv(dst, src);
-+  bind(L);
++frame frame::sender_for_entry_frame(RegisterMap* map) const {
++  assert(map != NULL, "map must be set");
++  // Java frame called from C; skip all C frames and return top C
++  // frame of that chunk as the sender
++  JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor();
++  assert(!entry_frame_is_first(), "next Java fp must be non zero");
++  assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack");
++  // Since we are walking the stack now this nested anchor is obviously walkable
++  // even if it wasn't when it was stacked.
++  if (!jfa->walkable()) {
++    // Capture _last_Java_pc (if needed) and mark anchor walkable.
++    jfa->capture_last_Java_pc();
++  }
++  map->clear();
++  assert(map->include_argument_oops(), "should be set by clear");
++  vmassert(jfa->last_Java_pc() != NULL, "not walkable");
++  frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
++  return fr;
 +}
 +
-+// Set dst to NaN if any NaN input.
-+void C2_MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2,
-+                                  bool is_double, bool is_min) {
-+  assert_different_registers(dst, src1, src2);
++//------------------------------------------------------------------------------
++// frame::verify_deopt_original_pc
++//
++// Verifies the calculated original PC of a deoptimization PC for the
++// given unextended SP.
++#ifdef ASSERT
++void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) {
++  frame fr;
 +
-+  Label Done;
-+  fsflags(zr);
-+  if (is_double) {
-+    is_min ? fmin_d(dst, src1, src2)
-+           : fmax_d(dst, src1, src2);
-+    // Checking NaNs
-+    flt_d(zr, src1, src2);
-+  } else {
-+    is_min ? fmin_s(dst, src1, src2)
-+           : fmax_s(dst, src1, src2);
-+    // Checking NaNs
-+    flt_s(zr, src1, src2);
-+  }
++  // This is ugly but it's better than to change {get,set}_original_pc
++  // to take an SP value as argument.  And it's only a debugging
++  // method anyway.
++  fr._unextended_sp = unextended_sp;
 +
-+  frflags(t0);
-+  beqz(t0, Done);
++  assert_cond(nm != NULL);
++  address original_pc = nm->get_original_pc(&fr);
++  assert(nm->insts_contains_inclusive(original_pc),
++         "original PC must be in the main code section of the the compiled method (or must be immediately following it)");
++}
++#endif
 +
-+  // In case of NaNs
-+  is_double ? fadd_d(dst, src1, src2)
-+            : fadd_s(dst, src1, src2);
++//------------------------------------------------------------------------------
++// frame::adjust_unextended_sp
++void frame::adjust_unextended_sp() {
++  // On riscv, sites calling method handle intrinsics and lambda forms are treated
++  // as any other call site. Therefore, no special action is needed when we are
++  // returning to any of these call sites.
 +
-+  bind(Done);
++  if (_cb != NULL) {
++    CompiledMethod* sender_cm = _cb->as_compiled_method_or_null();
++    if (sender_cm != NULL) {
++      // If the sender PC is a deoptimization point, get the original PC.
++      if (sender_cm->is_deopt_entry(_pc) ||
++          sender_cm->is_deopt_mh_entry(_pc)) {
++        DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp));
++      }
++    }
++  }
 +}
 +
-+void C2_MacroAssembler::element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2,
-+                                        VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE) {
-+  Label loop;
-+  Assembler::SEW sew = islatin ? Assembler::e8 : Assembler::e16;
-+
-+  bind(loop);
-+  vsetvli(tmp1, cnt, sew, Assembler::m2);
-+  vlex_v(vr1, a1, sew);
-+  vlex_v(vr2, a2, sew);
-+  vmsne_vv(vrs, vr1, vr2);
-+  vfirst_m(tmp2, vrs);
-+  bgez(tmp2, DONE);
-+  sub(cnt, cnt, tmp1);
-+  if (!islatin) {
-+    slli(tmp1, tmp1, 1); // get byte counts
-+  }
-+  add(a1, a1, tmp1);
-+  add(a2, a2, tmp1);
-+  bnez(cnt, loop);
++//------------------------------------------------------------------------------
++// frame::update_map_with_saved_link
++void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) {
++  // The interpreter and compiler(s) always save fp in a known
++  // location on entry. We must record where that location is
++  // so that if fp was live on callout from c2 we can find
++  // the saved copy no matter what it called.
 +
-+  mv(result, true);
++  // Since the interpreter always saves fp if we record where it is then
++  // we don't have to always save fp on entry and exit to c2 compiled
++  // code, on entry will be enough.
++  assert(map != NULL, "map must be set");
++  map->set_location(::fp->as_VMReg(), (address) link_addr);
++  // this is weird "H" ought to be at a higher address however the
++  // oopMaps seems to have the "H" regs at the same address and the
++  // vanilla register.
++  map->set_location(::fp->as_VMReg()->next(), (address) link_addr);
 +}
 +
-+void C2_MacroAssembler::string_equals_v(Register a1, Register a2, Register result, Register cnt, int elem_size) {
-+  Label DONE;
-+  Register tmp1 = t0;
-+  Register tmp2 = t1;
 +
-+  BLOCK_COMMENT("string_equals_v {");
++//------------------------------------------------------------------------------
++// frame::sender_for_interpreter_frame
++frame frame::sender_for_interpreter_frame(RegisterMap* map) const {
++  // SP is the raw SP from the sender after adapter or interpreter
++  // extension.
++  intptr_t* sender_sp = this->sender_sp();
 +
-+  mv(result, false);
++  // This is the sp before any possible extension (adapter/locals).
++  intptr_t* unextended_sp = interpreter_frame_sender_sp();
 +
-+  if (elem_size == 2) {
-+    srli(cnt, cnt, 1);
++#ifdef COMPILER2
++  assert(map != NULL, "map must be set");
++  if (map->update_map()) {
++    update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset));
 +  }
++#endif // COMPILER2
 +
-+  element_compare(a1, a2, result, cnt, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE);
-+
-+  bind(DONE);
-+  BLOCK_COMMENT("} string_equals_v");
-+}
-+
-+// used by C2 ClearArray patterns.
-+// base: Address of a buffer to be zeroed
-+// cnt: Count in HeapWords
-+//
-+// base, cnt, v0, v1 and t0 are clobbered.
-+void C2_MacroAssembler::clear_array_v(Register base, Register cnt) {
-+  Label loop;
-+
-+  // making zero words
-+  vsetvli(t0, cnt, Assembler::e64, Assembler::m4);
-+  vxor_vv(v0, v0, v0);
-+
-+  bind(loop);
-+  vsetvli(t0, cnt, Assembler::e64, Assembler::m4);
-+  vse64_v(v0, base);
-+  sub(cnt, cnt, t0);
-+  shadd(base, t0, base, t0, 3);
-+  bnez(cnt, loop);
++  return frame(sender_sp, unextended_sp, link(), sender_pc());
 +}
 +
-+void C2_MacroAssembler::arrays_equals_v(Register a1, Register a2, Register result,
-+                                        Register cnt1, int elem_size) {
-+  Label DONE;
-+  Register tmp1 = t0;
-+  Register tmp2 = t1;
-+  Register cnt2 = tmp2;
-+  int length_offset = arrayOopDesc::length_offset_in_bytes();
-+  int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
 +
-+  BLOCK_COMMENT("arrays_equals_v {");
++//------------------------------------------------------------------------------
++// frame::sender_for_compiled_frame
++frame frame::sender_for_compiled_frame(RegisterMap* map) const {
++  // we cannot rely upon the last fp having been saved to the thread
++  // in C2 code but it will have been pushed onto the stack. so we
++  // have to find it relative to the unextended sp
 +
-+  // if (a1 == a2), return true
-+  mv(result, true);
-+  beq(a1, a2, DONE);
++  assert(_cb->frame_size() >= 0, "must have non-zero frame size");
++  intptr_t* l_sender_sp = unextended_sp() + _cb->frame_size();
++  intptr_t* unextended_sp = l_sender_sp;
 +
-+  mv(result, false);
-+  // if a1 == null or a2 == null, return false
-+  beqz(a1, DONE);
-+  beqz(a2, DONE);
-+  // if (a1.length != a2.length), return false
-+  lwu(cnt1, Address(a1, length_offset));
-+  lwu(cnt2, Address(a2, length_offset));
-+  bne(cnt1, cnt2, DONE);
++  // the return_address is always the word on the stack
++  address sender_pc = (address) *(l_sender_sp + frame::return_addr_offset);
 +
-+  la(a1, Address(a1, base_offset));
-+  la(a2, Address(a2, base_offset));
++  intptr_t** saved_fp_addr = (intptr_t**) (l_sender_sp + frame::link_offset);
 +
-+  element_compare(a1, a2, result, cnt1, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE);
++  assert(map != NULL, "map must be set");
++  if (map->update_map()) {
++    // Tell GC to use argument oopmaps for some runtime stubs that need it.
++    // For C1, the runtime stub might not have oop maps, so set this flag
++    // outside of update_register_map.
++    map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
++    if (_cb->oop_maps() != NULL) {
++      OopMapSet::update_register_map(this, map);
++    }
 +
-+  bind(DONE);
++    // Since the prolog does the save and restore of FP there is no
++    // oopmap for it so we must fill in its location as if there was
++    // an oopmap entry since if our caller was compiled code there
++    // could be live jvm state in it.
++    update_map_with_saved_link(map, saved_fp_addr);
++  }
 +
-+  BLOCK_COMMENT("} arrays_equals_v");
++  return frame(l_sender_sp, unextended_sp, *saved_fp_addr, sender_pc);
 +}
 +
-+void C2_MacroAssembler::string_compare_v(Register str1, Register str2, Register cnt1, Register cnt2,
-+                                         Register result, Register tmp1, Register tmp2, int encForm) {
-+  Label DIFFERENCE, DONE, L, loop;
-+  bool encLL = encForm == StrIntrinsicNode::LL;
-+  bool encLU = encForm == StrIntrinsicNode::LU;
-+  bool encUL = encForm == StrIntrinsicNode::UL;
++//------------------------------------------------------------------------------
++// frame::sender
++frame frame::sender(RegisterMap* map) const {
++  // Default is we done have to follow them. The sender_for_xxx will
++  // update it accordingly
++  assert(map != NULL, "map must be set");
++  map->set_include_argument_oops(false);
 +
-+  bool str1_isL = encLL || encLU;
-+  bool str2_isL = encLL || encUL;
++  if (is_entry_frame()) {
++    return sender_for_entry_frame(map);
++  }
++  if (is_interpreted_frame()) {
++    return sender_for_interpreter_frame(map);
++  }
++  assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
 +
-+  int minCharsInWord = encLL ? wordSize : wordSize / 2;
-+
-+  BLOCK_COMMENT("string_compare {");
-+
-+  // for Lating strings, 1 byte for 1 character
-+  // for UTF16 strings, 2 bytes for 1 character
-+  if (!str1_isL)
-+    sraiw(cnt1, cnt1, 1);
-+  if (!str2_isL)
-+    sraiw(cnt2, cnt2, 1);
-+
-+  // if str1 == str2, return the difference
-+  // save the minimum of the string lengths in cnt2.
-+  sub(result, cnt1, cnt2);
-+  bgt(cnt1, cnt2, L);
-+  mv(cnt2, cnt1);
-+  bind(L);
-+
-+  if (str1_isL == str2_isL) { // LL or UU
-+    element_compare(str1, str2, zr, cnt2, tmp1, tmp2, v2, v4, v1, encLL, DIFFERENCE);
-+    j(DONE);
-+  } else { // LU or UL
-+    Register strL = encLU ? str1 : str2;
-+    Register strU = encLU ? str2 : str1;
-+    VectorRegister vstr1 = encLU ? v4 : v0;
-+    VectorRegister vstr2 = encLU ? v0 : v4;
-+
-+    bind(loop);
-+    vsetvli(tmp1, cnt2, Assembler::e8, Assembler::m2);
-+    vle8_v(vstr1, strL);
-+    vsetvli(tmp1, cnt2, Assembler::e16, Assembler::m4);
-+    vzext_vf2(vstr2, vstr1);
-+    vle16_v(vstr1, strU);
-+    vmsne_vv(v0, vstr2, vstr1);
-+    vfirst_m(tmp2, v0);
-+    bgez(tmp2, DIFFERENCE);
-+    sub(cnt2, cnt2, tmp1);
-+    add(strL, strL, tmp1);
-+    shadd(strU, tmp1, strU, tmp1, 1);
-+    bnez(cnt2, loop);
-+    j(DONE);
++  // This test looks odd: why is it not is_compiled_frame() ?  That's
++  // because stubs also have OOP maps.
++  if (_cb != NULL) {
++    return sender_for_compiled_frame(map);
 +  }
-+  bind(DIFFERENCE);
-+  slli(tmp1, tmp2, 1);
-+  add(str1, str1, str1_isL ? tmp2 : tmp1);
-+  add(str2, str2, str2_isL ? tmp2 : tmp1);
-+  str1_isL ? lbu(tmp1, Address(str1, 0)) : lhu(tmp1, Address(str1, 0));
-+  str2_isL ? lbu(tmp2, Address(str2, 0)) : lhu(tmp2, Address(str2, 0));
-+  sub(result, tmp1, tmp2);
 +
-+  bind(DONE);
++  // Must be native-compiled frame, i.e. the marshaling code for native
++  // methods that exists in the core system.
++  return frame(sender_sp(), link(), sender_pc());
 +}
 +
-+void C2_MacroAssembler::byte_array_inflate_v(Register src, Register dst, Register len, Register tmp) {
-+  Label loop;
-+  assert_different_registers(src, dst, len, tmp, t0);
++bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
++  assert(is_interpreted_frame(), "Not an interpreted frame");
++  // These are reasonable sanity checks
++  if (fp() == NULL || (intptr_t(fp()) & (wordSize-1)) != 0) {
++    return false;
++  }
++  if (sp() == NULL || (intptr_t(sp()) & (wordSize-1)) != 0) {
++    return false;
++  }
++  if (fp() + interpreter_frame_initial_sp_offset < sp()) {
++    return false;
++  }
++  // These are hacks to keep us out of trouble.
++  // The problem with these is that they mask other problems
++  if (fp() <= sp()) {        // this attempts to deal with unsigned comparison above
++    return false;
++  }
 +
-+  BLOCK_COMMENT("byte_array_inflate_v {");
-+  bind(loop);
-+  vsetvli(tmp, len, Assembler::e8, Assembler::m2);
-+  vle8_v(v2, src);
-+  vsetvli(t0, len, Assembler::e16, Assembler::m4);
-+  vzext_vf2(v0, v2);
-+  vse16_v(v0, dst);
-+  sub(len, len, tmp);
-+  add(src, src, tmp);
-+  shadd(dst, tmp, dst, tmp, 1);
-+  bnez(len, loop);
-+  BLOCK_COMMENT("} byte_array_inflate_v");
-+}
++  // do some validation of frame elements
 +
-+// Compress char[] array to byte[].
-+// result: the array length if every element in array can be encoded; 0, otherwise.
-+void C2_MacroAssembler::char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp) {
-+  Label done;
-+  encode_iso_array_v(src, dst, len, result, tmp);
-+  beqz(len, done);
-+  mv(result, zr);
-+  bind(done);
-+}
++  // first the method
++  Method* m = *interpreter_frame_method_addr();
++  // validate the method we'd find in this potential sender
++  if (!Method::is_valid_method(m)) {
++    return false;
++  }
++
++  // stack frames shouldn't be much larger than max_stack elements
++  // this test requires the use of unextended_sp which is the sp as seen by
++  // the current frame, and not sp which is the "raw" pc which could point
++  // further because of local variables of the callee method inserted after
++  // method arguments
++  if (fp() - unextended_sp() > 1024 + m->max_stack()*Interpreter::stackElementSize) {
++    return false;
++  }
 +
-+// result: the number of elements had been encoded.
-+void C2_MacroAssembler::encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp) {
-+  Label loop, DIFFERENCE, DONE;
++  // validate bci/bcx
++  address bcp = interpreter_frame_bcp();
++  if (m->validate_bci_from_bcp(bcp) < 0) {
++    return false;
++  }
 +
-+  BLOCK_COMMENT("encode_iso_array_v {");
-+  mv(result, 0);
++  // validate constantPoolCache*
++  ConstantPoolCache* cp = *interpreter_frame_cache_addr();
++  if (MetaspaceObj::is_valid(cp) == false) {
++    return false;
++  }
 +
-+  bind(loop);
-+  mv(tmp, 0xff);
-+  vsetvli(t0, len, Assembler::e16, Assembler::m2);
-+  vle16_v(v2, src);
-+  // if element > 0xff, stop
-+  vmsgtu_vx(v1, v2, tmp);
-+  vfirst_m(tmp, v1);
-+  vmsbf_m(v0, v1);
-+  // compress char to byte
-+  vsetvli(t0, len, Assembler::e8);
-+  vncvt_x_x_w(v1, v2, Assembler::v0_t);
-+  vse8_v(v1, dst, Assembler::v0_t);
-+
-+  bgez(tmp, DIFFERENCE);
-+  add(result, result, t0);
-+  add(dst, dst, t0);
-+  sub(len, len, t0);
-+  shadd(src, t0, src, t0, 1);
-+  bnez(len, loop);
-+  j(DONE);
++  // validate locals
++  address locals = (address) *interpreter_frame_locals_addr();
++  if (locals > thread->stack_base()) {
++    return false;
++  }
 +
-+  bind(DIFFERENCE);
-+  add(result, result, tmp);
++  if (m->max_locals() > 0 && locals < (address) fp()) {
++    // fp in interpreter frame on RISC-V is higher than that on AArch64,
++    // pointing to sender_sp and sender_sp-2 relatively.
++    // On RISC-V, if max_locals is 0, the 'locals' pointer may be below fp,
++    // pointing to sender_sp-1 (with one padding slot).
++    // So we verify the 'locals' pointer only if max_locals > 0.
++    return false;
++  }
 +
-+  bind(DONE);
-+  BLOCK_COMMENT("} encode_iso_array_v");
++  // We'd have to be pretty unlucky to be mislead at this point
++  return true;
 +}
 +
-+void C2_MacroAssembler::count_positives_v(Register ary, Register len, Register result, Register tmp) {
-+  Label LOOP, SET_RESULT, DONE;
-+
-+  BLOCK_COMMENT("count_positives_v {");
-+  mv(result, zr);
++BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  Method* method = interpreter_frame_method();
++  BasicType type = method->result_type();
 +
-+  bind(LOOP);
-+  vsetvli(t0, len, Assembler::e8, Assembler::m4);
-+  vle8_v(v0, ary);
-+  vmslt_vx(v0, v0, zr);
-+  vfirst_m(tmp, v0);
-+  bgez(tmp, SET_RESULT);
-+  // if tmp == -1, all bytes are positive
-+  add(result, result, t0);
-+
-+  sub(len, len, t0);
-+  add(ary, ary, t0);
-+  bnez(len, LOOP);
-+  j(DONE);
++  intptr_t* tos_addr = NULL;
++  if (method->is_native()) {
++    tos_addr = (intptr_t*)sp();
++    if (type == T_FLOAT || type == T_DOUBLE) {
++      // This is because we do a push(ltos) after push(dtos) in generate_native_entry.
++      tos_addr += 2 * Interpreter::stackElementWords;
++    }
++  } else {
++    tos_addr = (intptr_t*)interpreter_frame_tos_address();
++  }
 +
-+  // add remaining positive bytes count
-+  bind(SET_RESULT);
-+  add(result, result, tmp);
++  switch (type) {
++    case T_OBJECT  :
++    case T_ARRAY   : {
++      oop obj;
++      if (method->is_native()) {
++        obj = cast_to_oop(at(interpreter_frame_oop_temp_offset));
++      } else {
++        oop* obj_p = (oop*)tos_addr;
++        obj = (obj_p == NULL) ? (oop)NULL : *obj_p;
++      }
++      assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check");
++      *oop_result = obj;
++      break;
++    }
++    case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break;
++    case T_BYTE    : value_result->b = *(jbyte*)tos_addr; break;
++    case T_CHAR    : value_result->c = *(jchar*)tos_addr; break;
++    case T_SHORT   : value_result->s = *(jshort*)tos_addr; break;
++    case T_INT     : value_result->i = *(jint*)tos_addr; break;
++    case T_LONG    : value_result->j = *(jlong*)tos_addr; break;
++    case T_FLOAT   : {
++        value_result->f = *(jfloat*)tos_addr;
++      break;
++    }
++    case T_DOUBLE  : value_result->d = *(jdouble*)tos_addr; break;
++    case T_VOID    : /* Nothing to do */ break;
++    default        : ShouldNotReachHere();
++  }
 +
-+  bind(DONE);
-+  BLOCK_COMMENT("} count_positives_v");
++  return type;
 +}
 +
-+void C2_MacroAssembler::string_indexof_char_v(Register str1, Register cnt1,
-+                                              Register ch, Register result,
-+                                              Register tmp1, Register tmp2,
-+                                              bool isL) {
-+  mv(result, zr);
-+
-+  Label loop, MATCH, DONE;
-+  Assembler::SEW sew = isL ? Assembler::e8 : Assembler::e16;
-+  bind(loop);
-+  vsetvli(tmp1, cnt1, sew, Assembler::m4);
-+  vlex_v(v0, str1, sew);
-+  vmseq_vx(v0, v0, ch);
-+  vfirst_m(tmp2, v0);
-+  bgez(tmp2, MATCH); // if equal, return index
-+
-+  add(result, result, tmp1);
-+  sub(cnt1, cnt1, tmp1);
-+  if (!isL) slli(tmp1, tmp1, 1);
-+  add(str1, str1, tmp1);
-+  bnez(cnt1, loop);
-+
-+  mv(result, -1);
-+  j(DONE);
-+
-+  bind(MATCH);
-+  add(result, result, tmp2);
 +
-+  bind(DONE);
++intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
++  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
++  return &interpreter_frame_tos_address()[index];
 +}
 +
-+// Set dst to NaN if any NaN input.
-+void C2_MacroAssembler::minmax_FD_v(VectorRegister dst, VectorRegister src1, VectorRegister src2,
-+                                    bool is_double, bool is_min) {
-+  assert_different_registers(dst, src1, src2);
++#ifndef PRODUCT
 +
-+  vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32);
++#define DESCRIBE_FP_OFFSET(name) \
++  values.describe(frame_no, fp() + frame::name##_offset, #name)
 +
-+  is_min ? vfmin_vv(dst, src1, src2)
-+         : vfmax_vv(dst, src1, src2);
++void frame::describe_pd(FrameValues& values, int frame_no) {
++  if (is_interpreted_frame()) {
++    DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_method);
++    DESCRIBE_FP_OFFSET(interpreter_frame_mdp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_mirror);
++    DESCRIBE_FP_OFFSET(interpreter_frame_cache);
++    DESCRIBE_FP_OFFSET(interpreter_frame_locals);
++    DESCRIBE_FP_OFFSET(interpreter_frame_bcp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
++  }
++}
++#endif
 +
-+  vmfne_vv(v0,  src1, src1);
-+  vfadd_vv(dst, src1, src1, Assembler::v0_t);
-+  vmfne_vv(v0,  src2, src2);
-+  vfadd_vv(dst, src2, src2, Assembler::v0_t);
++intptr_t *frame::initial_deoptimization_info() {
++  // Not used on riscv, but we must return something.
++  return NULL;
 +}
 +
-+// Set dst to NaN if any NaN input.
-+void C2_MacroAssembler::reduce_minmax_FD_v(FloatRegister dst,
-+                                           FloatRegister src1, VectorRegister src2,
-+                                           VectorRegister tmp1, VectorRegister tmp2,
-+                                           bool is_double, bool is_min) {
-+  assert_different_registers(src2, tmp1, tmp2);
++intptr_t* frame::real_fp() const {
++  if (_cb != NULL) {
++    // use the frame size if valid
++    int size = _cb->frame_size();
++    if (size > 0) {
++      return unextended_sp() + size;
++    }
++  }
++  // else rely on fp()
++  assert(!is_compiled_frame(), "unknown compiled frame size");
++  return fp();
++}
 +
-+  Label L_done, L_NaN;
-+  vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32);
-+  vfmv_s_f(tmp2, src1);
++#undef DESCRIBE_FP_OFFSET
 +
-+  is_min ? vfredmin_vs(tmp1, src2, tmp2)
-+         : vfredmax_vs(tmp1, src2, tmp2);
++#ifndef PRODUCT
++// This is a generic constructor which is only used by pns() in debug.cpp.
++frame::frame(void* ptr_sp, void* ptr_fp, void* pc) {
++  init((intptr_t*)ptr_sp, (intptr_t*)ptr_fp, (address)pc);
++}
 +
-+  fsflags(zr);
-+  // Checking NaNs
-+  vmflt_vf(tmp2, src2, src1);
-+  frflags(t0);
-+  bnez(t0, L_NaN);
-+  j(L_done);
++void frame::pd_ps() {}
++#endif
 +
-+  bind(L_NaN);
-+  vfmv_s_f(tmp2, src1);
-+  vfredsum_vs(tmp1, src2, tmp2);
++void JavaFrameAnchor::make_walkable(JavaThread* thread) {
++  // last frame set?
++  if (last_Java_sp() == NULL) { return; }
++  // already walkable?
++  if (walkable()) { return; }
++  vmassert(Thread::current() == (Thread*)thread, "not current thread");
++  vmassert(last_Java_sp() != NULL, "not called from Java code?");
++  vmassert(last_Java_pc() == NULL, "already walkable");
++  capture_last_Java_pc();
++  vmassert(walkable(), "something went wrong");
++}
 +
-+  bind(L_done);
-+  vfmv_f_s(dst, tmp1);
++void JavaFrameAnchor::capture_last_Java_pc() {
++  vmassert(_last_Java_sp != NULL, "no last frame set");
++  vmassert(_last_Java_pc == NULL, "already walkable");
++  _last_Java_pc = (address)_last_Java_sp[-1];
 +}
-diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
+diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp
 new file mode 100644
-index 00000000000..c71df4c101b
+index 0000000000..18e021dcb9
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
-@@ -0,0 +1,193 @@
++++ b/src/hotspot/cpu/riscv/frame_riscv.hpp
+@@ -0,0 +1,199 @@
 +/*
-+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -13371,182 +12788,189 @@ index 00000000000..c71df4c101b
 + *
 + */
 +
-+#ifndef CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP
-+#define CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP
-+
-+// C2_MacroAssembler contains high-level macros for C2
-+
-+ private:
-+  void element_compare(Register r1, Register r2,
-+                       Register result, Register cnt,
-+                       Register tmp1, Register tmp2,
-+                       VectorRegister vr1, VectorRegister vr2,
-+                       VectorRegister vrs,
-+                       bool is_latin, Label& DONE);
-+ public:
++#ifndef CPU_RISCV_FRAME_RISCV_HPP
++#define CPU_RISCV_FRAME_RISCV_HPP
 +
-+  void string_compare(Register str1, Register str2,
-+                      Register cnt1, Register cnt2, Register result,
-+                      Register tmp1, Register tmp2, Register tmp3,
-+                      int ae);
++#include "runtime/synchronizer.hpp"
 +
-+  void string_indexof_char_short(Register str1, Register cnt1,
-+                                 Register ch, Register result,
-+                                 bool isL);
++// A frame represents a physical stack frame (an activation).  Frames can be
++// C or Java frames, and the Java frames can be interpreted or compiled.
++// In contrast, vframes represent source-level activations, so that one physical frame
++// can correspond to multiple source level frames because of inlining.
++// A frame is comprised of {pc, fp, sp}
++// ------------------------------ Asm interpreter ----------------------------------------
++// Layout of asm interpreter frame:
++//    [expression stack      ] * <- sp
 +
-+  void string_indexof_char(Register str1, Register cnt1,
-+                           Register ch, Register result,
-+                           Register tmp1, Register tmp2,
-+                           Register tmp3, Register tmp4,
-+                           bool isL);
++//    [monitors[0]           ]   \
++//     ...                        | monitor block size = k
++//    [monitors[k-1]         ]   /
++//    [frame initial esp     ] ( == &monitors[0], initially here)       initial_sp_offset
++//    [byte code index/pointr]                   = bcx()                bcx_offset
 +
-+  void string_indexof(Register str1, Register str2,
-+                      Register cnt1, Register cnt2,
-+                      Register tmp1, Register tmp2,
-+                      Register tmp3, Register tmp4,
-+                      Register tmp5, Register tmp6,
-+                      Register result, int ae);
++//    [pointer to locals     ]                   = locals()             locals_offset
++//    [constant pool cache   ]                   = cache()              cache_offset
 +
-+  void string_indexof_linearscan(Register haystack, Register needle,
-+                                 Register haystack_len, Register needle_len,
-+                                 Register tmp1, Register tmp2,
-+                                 Register tmp3, Register tmp4,
-+                                 int needle_con_cnt, Register result, int ae);
++//    [klass of method       ]                   = mirror()             mirror_offset
++//    [padding               ]
 +
-+  void arrays_equals(Register r1, Register r2,
-+                     Register tmp3, Register tmp4,
-+                     Register tmp5, Register tmp6,
-+                     Register result, Register cnt1,
-+                     int elem_size);
++//    [methodData            ]                   = mdp()                mdx_offset
++//    [Method                ]                   = method()             method_offset
 +
-+  void string_equals(Register r1, Register r2,
-+                     Register result, Register cnt1,
-+                     int elem_size);
++//    [last esp              ]                   = last_sp()            last_sp_offset
++//    [old stack pointer     ]                     (sender_sp)          sender_sp_offset
 +
-+  // refer to conditional_branches and float_conditional_branches
-+  static const int bool_test_bits = 3;
-+  static const int neg_cond_bits = 2;
-+  static const int unsigned_branch_mask = 1 << bool_test_bits;
-+  static const int double_branch_mask = 1 << bool_test_bits;
++//    [old frame pointer     ]
++//    [return pc             ]
 +
-+  // cmp
-+  void cmp_branch(int cmpFlag,
-+                  Register op1, Register op2,
-+                  Label& label, bool is_far = false);
++//    [last sp               ]   <- fp           = link()
++//    [oop temp              ]                     (only for native calls)
 +
-+  void float_cmp_branch(int cmpFlag,
-+                        FloatRegister op1, FloatRegister op2,
-+                        Label& label, bool is_far = false);
++//    [padding               ]                     (to preserve machine SP alignment)
++//    [locals and parameters ]
++//                               <- sender sp
++// ------------------------------ Asm interpreter ----------------------------------------
 +
-+  void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op,
-+                                    Label& L, bool is_far = false);
++// ------------------------------ C Frame ------------------------------------------------
++// Stack: gcc with -fno-omit-frame-pointer
++//                    .
++//                    .
++//       +->          .
++//       |   +-----------------+   |
++//       |   | return address  |   |
++//       |   |   previous fp ------+
++//       |   | saved registers |
++//       |   | local variables |
++//       |   |       ...       | <-+
++//       |   +-----------------+   |
++//       |   | return address  |   |
++//       +------ previous fp   |   |
++//           | saved registers |   |
++//           | local variables |   |
++//       +-> |       ...       |   |
++//       |   +-----------------+   |
++//       |   | return address  |   |
++//       |   |   previous fp ------+
++//       |   | saved registers |
++//       |   | local variables |
++//       |   |       ...       | <-+
++//       |   +-----------------+   |
++//       |   | return address  |   |
++//       +------ previous fp   |   |
++//           | saved registers |   |
++//           | local variables |   |
++//   $fp --> |       ...       |   |
++//           +-----------------+   |
++//           | return address  |   |
++//           |   previous fp ------+
++//           | saved registers |
++//   $sp --> | local variables |
++//           +-----------------+
++// ------------------------------ C Frame ------------------------------------------------
 +
-+  void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op,
-+                               Label& L, bool is_far = false);
++ public:
++  enum {
++    pc_return_offset                                 =  0,
++    // All frames
++    link_offset                                      = -2,
++    return_addr_offset                               = -1,
++    sender_sp_offset                                 =  0,
++    // Interpreter frames
++    interpreter_frame_oop_temp_offset                =  1, // for native calls only
 +
-+  void enc_cmove(int cmpFlag,
-+                 Register op1, Register op2,
-+                 Register dst, Register src);
++    interpreter_frame_sender_sp_offset               = -3,
++    // outgoing sp before a call to an invoked method
++    interpreter_frame_last_sp_offset                 = interpreter_frame_sender_sp_offset - 1,
++    interpreter_frame_method_offset                  = interpreter_frame_last_sp_offset - 1,
++    interpreter_frame_mdp_offset                     = interpreter_frame_method_offset - 1,
++    interpreter_frame_padding_offset                 = interpreter_frame_mdp_offset - 1,
++    interpreter_frame_mirror_offset                  = interpreter_frame_padding_offset - 1,
++    interpreter_frame_cache_offset                   = interpreter_frame_mirror_offset - 1,
++    interpreter_frame_locals_offset                  = interpreter_frame_cache_offset - 1,
++    interpreter_frame_bcp_offset                     = interpreter_frame_locals_offset - 1,
++    interpreter_frame_initial_sp_offset              = interpreter_frame_bcp_offset - 1,
 +
-+  void spill(Register r, bool is64, int offset) {
-+    is64 ? sd(r, Address(sp, offset))
-+         : sw(r, Address(sp, offset));
-+  }
++    interpreter_frame_monitor_block_top_offset       = interpreter_frame_initial_sp_offset,
++    interpreter_frame_monitor_block_bottom_offset    = interpreter_frame_initial_sp_offset,
 +
-+  void spill(FloatRegister f, bool is64, int offset) {
-+    is64 ? fsd(f, Address(sp, offset))
-+         : fsw(f, Address(sp, offset));
-+  }
++    // Entry frames
++    // n.b. these values are determined by the layout defined in
++    // stubGenerator for the Java call stub
++    entry_frame_after_call_words                     =  34,
++    entry_frame_call_wrapper_offset                  = -10,
 +
-+  void spill(VectorRegister v, int offset) {
-+    add(t0, sp, offset);
-+    vs1r_v(v, t0);
-+  }
++    // we don't need a save area
++    arg_reg_save_area_bytes                          =  0
++  };
 +
-+  void unspill(Register r, bool is64, int offset) {
-+    is64 ? ld(r, Address(sp, offset))
-+         : lw(r, Address(sp, offset));
++  intptr_t ptr_at(int offset) const {
++    return *ptr_at_addr(offset);
 +  }
 +
-+  void unspillu(Register r, bool is64, int offset) {
-+    is64 ? ld(r, Address(sp, offset))
-+         : lwu(r, Address(sp, offset));
++  void ptr_at_put(int offset, intptr_t value) {
++    *ptr_at_addr(offset) = value;
 +  }
 +
-+  void unspill(FloatRegister f, bool is64, int offset) {
-+    is64 ? fld(f, Address(sp, offset))
-+         : flw(f, Address(sp, offset));
-+  }
++ private:
++  // an additional field beyond _sp and _pc:
++  intptr_t*   _fp; // frame pointer
++  // The interpreter and adapters will extend the frame of the caller.
++  // Since oopMaps are based on the sp of the caller before extension
++  // we need to know that value. However in order to compute the address
++  // of the return address we need the real "raw" sp. Since sparc already
++  // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's
++  // original sp we use that convention.
 +
-+  void unspill(VectorRegister v, int offset) {
-+    add(t0, sp, offset);
-+    vl1r_v(v, t0);
-+  }
++  intptr_t*     _unextended_sp;
++  void adjust_unextended_sp();
 +
-+  void spill_copy_vector_stack_to_stack(int src_offset, int dst_offset, int vec_reg_size_in_bytes) {
-+    assert(vec_reg_size_in_bytes % 16 == 0, "unexpected vector reg size");
-+    unspill(v0, src_offset);
-+    spill(v0, dst_offset);
++  intptr_t* ptr_at_addr(int offset) const {
++    return (intptr_t*) addr_at(offset);
 +  }
 +
-+  void minmax_FD(FloatRegister dst,
-+                 FloatRegister src1, FloatRegister src2,
-+                 bool is_double, bool is_min);
++#ifdef ASSERT
++  // Used in frame::sender_for_{interpreter,compiled}_frame
++  static void verify_deopt_original_pc(   CompiledMethod* nm, intptr_t* unextended_sp);
++#endif
 +
-+  // intrinsic methods implemented by rvv instructions
-+  void string_equals_v(Register r1, Register r2,
-+                       Register result, Register cnt1,
-+                       int elem_size);
++ public:
++  // Constructors
 +
-+  void arrays_equals_v(Register r1, Register r2,
-+                       Register result, Register cnt1,
-+                       int elem_size);
++  frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc);
 +
-+  void string_compare_v(Register str1, Register str2,
-+                        Register cnt1, Register cnt2,
-+                        Register result,
-+                        Register tmp1, Register tmp2,
-+                        int encForm);
++  frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc);
 +
-+ void clear_array_v(Register base, Register cnt);
++  frame(intptr_t* ptr_sp, intptr_t* ptr_fp);
 +
-+ void byte_array_inflate_v(Register src, Register dst,
-+                           Register len, Register tmp);
++  void init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc);
 +
-+ void char_array_compress_v(Register src, Register dst,
-+                            Register len, Register result,
-+                            Register tmp);
++  // accessors for the instance variables
++  // Note: not necessarily the real 'frame pointer' (see real_fp)
++  intptr_t*   fp() const { return _fp; }
 +
-+ void encode_iso_array_v(Register src, Register dst,
-+                         Register len, Register result,
-+                         Register tmp);
++  inline address* sender_pc_addr() const;
 +
-+ void count_positives_v(Register ary, Register len,
-+                        Register result, Register tmp);
++  // expression stack tos if we are nested in a java call
++  intptr_t* interpreter_frame_last_sp() const;
 +
-+ void string_indexof_char_v(Register str1, Register cnt1,
-+                            Register ch, Register result,
-+                            Register tmp1, Register tmp2,
-+                            bool isL);
++  // helper to update a map with callee-saved RBP
++  static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr);
 +
-+ void minmax_FD_v(VectorRegister dst,
-+                  VectorRegister src1, VectorRegister src2,
-+                  bool is_double, bool is_min);
++  // deoptimization support
++  void interpreter_frame_set_last_sp(intptr_t* last_sp);
 +
-+ void reduce_minmax_FD_v(FloatRegister dst,
-+                         FloatRegister src1, VectorRegister src2,
-+                         VectorRegister tmp1, VectorRegister tmp2,
-+                         bool is_double, bool is_min);
++  static jint interpreter_frame_expression_stack_direction() { return -1; }
 +
-+#endif // CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
++#endif // CPU_RISCV_FRAME_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
 new file mode 100644
-index 00000000000..53a41665f4b
+index 0000000000..abd5bda7e4
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
-@@ -0,0 +1,83 @@
++++ b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
+@@ -0,0 +1,245 @@
 +/*
-+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -13570,509 +12994,233 @@ index 00000000000..53a41665f4b
 + *
 + */
 +
-+#ifndef CPU_RISCV_C2_GLOBALS_RISCV_HPP
-+#define CPU_RISCV_C2_GLOBALS_RISCV_HPP
++#ifndef CPU_RISCV_FRAME_RISCV_INLINE_HPP
++#define CPU_RISCV_FRAME_RISCV_INLINE_HPP
 +
-+#include "utilities/globalDefinitions.hpp"
-+#include "utilities/macros.hpp"
++#include "code/codeCache.hpp"
++#include "code/vmreg.inline.hpp"
 +
-+// Sets the default values for platform dependent flags used by the server compiler.
-+// (see c2_globals.hpp).  Alpha-sorted.
++// Inline functions for RISCV frames:
 +
-+define_pd_global(bool, BackgroundCompilation,        true);
-+define_pd_global(bool, CICompileOSR,                 true);
-+define_pd_global(bool, InlineIntrinsics,             true);
-+define_pd_global(bool, PreferInterpreterNativeStubs, false);
-+define_pd_global(bool, ProfileTraps,                 true);
-+define_pd_global(bool, UseOnStackReplacement,        true);
-+define_pd_global(bool, ProfileInterpreter,           true);
-+define_pd_global(bool, TieredCompilation,            COMPILER1_PRESENT(true) NOT_COMPILER1(false));
-+define_pd_global(intx, CompileThreshold,             10000);
++// Constructors:
 +
-+define_pd_global(intx, OnStackReplacePercentage,     140);
-+define_pd_global(intx, ConditionalMoveLimit,         0);
-+define_pd_global(intx, FreqInlineSize,               325);
-+define_pd_global(intx, MinJumpTableSize,             10);
-+define_pd_global(intx, InteriorEntryAlignment,       16);
-+define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
-+define_pd_global(intx, LoopUnrollLimit,              60);
-+define_pd_global(intx, LoopPercentProfileLimit,      10);
-+// InitialCodeCacheSize derived from specjbb2000 run.
-+define_pd_global(intx, InitialCodeCacheSize,         2496*K); // Integral multiple of CodeCacheExpansionSize
-+define_pd_global(intx, CodeCacheExpansionSize,       64*K);
++inline frame::frame() {
++  _pc = NULL;
++  _sp = NULL;
++  _unextended_sp = NULL;
++  _fp = NULL;
++  _cb = NULL;
++  _deopt_state = unknown;
++}
 +
-+// Ergonomics related flags
-+define_pd_global(uint64_t,MaxRAM,                    128ULL*G);
-+define_pd_global(intx, RegisterCostAreaRatio,        16000);
++static int spin;
 +
-+// Peephole and CISC spilling both break the graph, and so makes the
-+// scheduler sick.
-+define_pd_global(bool, OptoPeephole,                 false);
-+define_pd_global(bool, UseCISCSpill,                 false);
-+define_pd_global(bool, OptoScheduling,               true);
-+define_pd_global(bool, OptoBundling,                 false);
-+define_pd_global(bool, OptoRegScheduling,            false);
-+define_pd_global(bool, SuperWordLoopUnrollAnalysis,  true);
-+define_pd_global(bool, IdealizeClearArrayNode,       true);
++inline void frame::init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) {
++  intptr_t a = intptr_t(ptr_sp);
++  intptr_t b = intptr_t(ptr_fp);
++  _sp = ptr_sp;
++  _unextended_sp = ptr_sp;
++  _fp = ptr_fp;
++  _pc = pc;
++  assert(pc != NULL, "no pc?");
++  _cb = CodeCache::find_blob(pc);
++  adjust_unextended_sp();
 +
-+define_pd_global(intx, ReservedCodeCacheSize,        48*M);
-+define_pd_global(intx, NonProfiledCodeHeapSize,      21*M);
-+define_pd_global(intx, ProfiledCodeHeapSize,         22*M);
-+define_pd_global(intx, NonNMethodCodeHeapSize,       5*M );
-+define_pd_global(uintx, CodeCacheMinBlockLength,     6);
-+define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
-+
-+// Ergonomics related flags
-+define_pd_global(bool, NeverActAsServerClassMachine, false);
-+
-+define_pd_global(bool, TrapBasedRangeChecks,         false); // Not needed.
-+
-+#endif // CPU_RISCV_C2_GLOBALS_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/c2_init_riscv.cpp b/src/hotspot/cpu/riscv/c2_init_riscv.cpp
-new file mode 100644
-index 00000000000..cdbd69807be
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/c2_init_riscv.cpp
-@@ -0,0 +1,38 @@
-+/*
-+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#include "precompiled.hpp"
-+#include "opto/compile.hpp"
-+#include "opto/node.hpp"
-+
-+// processor dependent initialization for riscv
-+
-+extern void reg_mask_init();
-+
-+void Compile::pd_compiler2_init() {
-+  guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" );
-+  reg_mask_init();
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
 +}
-diff --git a/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp b/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp
-new file mode 100644
-index 00000000000..a90d9fdc160
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp
-@@ -0,0 +1,47 @@
-+/*
-+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.hpp"
-+#include "opto/compile.hpp"
-+#include "opto/node.hpp"
-+#include "opto/output.hpp"
-+#include "runtime/sharedRuntime.hpp"
 +
-+#define __ masm.
-+void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointPollStub* entry) const {
-+  assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
-+         "polling page return stub not created yet");
-+  address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
-+  RuntimeAddress callback_addr(stub);
-+
-+  __ bind(entry->_stub_label);
-+  InternalAddress safepoint_pc(masm.pc() - masm.offset() + entry->_safepoint_offset);
-+  masm.code_section()->relocate(masm.pc(), safepoint_pc.rspec());
-+  __ la(t0, safepoint_pc.target());
-+  __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset()));
-+  __ far_jump(callback_addr);
++inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) {
++  init(ptr_sp, ptr_fp, pc);
 +}
-+#undef __
-diff --git a/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp
-new file mode 100644
-index 00000000000..14a68b45026
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp
-@@ -0,0 +1,36 @@
-+/*
-+ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#ifndef CPU_RISCV_CODEBUFFER_RISCV_HPP
-+#define CPU_RISCV_CODEBUFFER_RISCV_HPP
-+
-+private:
-+  void pd_initialize() {}
-+
-+public:
-+  void flush_bundle(bool start_new_bundle) {}
-+
-+#endif // CPU_RISCV_CODEBUFFER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-new file mode 100644
-index 00000000000..75bc4be7840
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-@@ -0,0 +1,149 @@
-+/*
-+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "code/compiledIC.hpp"
-+#include "code/icBuffer.hpp"
-+#include "code/nmethod.hpp"
-+#include "memory/resourceArea.hpp"
-+#include "runtime/mutexLocker.hpp"
-+#include "runtime/safepoint.hpp"
 +
-+// ----------------------------------------------------------------------------
-+
-+#define __ _masm.
-+address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
-+  precond(cbuf.stubs()->start() != badAddress);
-+  precond(cbuf.stubs()->end() != badAddress);
-+  // Stub is fixed up when the corresponding call is converted from
-+  // calling compiled code to calling interpreted code.
-+  // mv xmethod, 0
-+  // jalr -4 # to self
-+
-+  if (mark == NULL) {
-+    mark = cbuf.insts_mark();  // Get mark within main instrs section.
-+  }
-+
-+  // Note that the code buffer's insts_mark is always relative to insts.
-+  // That's why we must use the macroassembler to generate a stub.
-+  MacroAssembler _masm(&cbuf);
++inline frame::frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc) {
++  intptr_t a = intptr_t(ptr_sp);
++  intptr_t b = intptr_t(ptr_fp);
++  _sp = ptr_sp;
++  _unextended_sp = unextended_sp;
++  _fp = ptr_fp;
++  _pc = pc;
++  assert(pc != NULL, "no pc?");
++  _cb = CodeCache::find_blob(pc);
++  adjust_unextended_sp();
 +
-+  address base = __ start_a_stub(to_interp_stub_size());
-+  int offset = __ offset();
-+  if (base == NULL) {
-+    return NULL;  // CodeBuffer::expand failed
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    assert(_cb->as_compiled_method()->insts_contains_inclusive(_pc),
++           "original PC must be in the main code section of the the compiled method (or must be immediately following it)");
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
 +  }
-+  // static stub relocation stores the instruction address of the call
-+  __ relocate(static_stub_Relocation::spec(mark));
-+
-+  __ emit_static_call_stub();
-+
-+  assert((__ offset() - offset) <= (int)to_interp_stub_size(), "stub too big");
-+  __ end_a_stub();
-+  return base;
-+}
-+#undef __
-+
-+int CompiledStaticCall::to_interp_stub_size() {
-+  // fence_i + fence* + (lui, addi, slli, addi, slli, addi) + (lui, addi, slli, addi, slli) + jalr
-+  return NativeFenceI::instruction_size() + 12 * NativeInstruction::instruction_size;
 +}
 +
-+int CompiledStaticCall::to_trampoline_stub_size() {
-+  // Somewhat pessimistically, we count 4 instructions here (although
-+  // there are only 3) because we sometimes emit an alignment nop.
-+  // Trampoline stubs are always word aligned.
-+  return NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size;
-+}
++inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp) {
++  intptr_t a = intptr_t(ptr_sp);
++  intptr_t b = intptr_t(ptr_fp);
++  _sp = ptr_sp;
++  _unextended_sp = ptr_sp;
++  _fp = ptr_fp;
++  _pc = (address)(ptr_sp[-1]);
 +
-+// Relocation entries for call stub, compiled java to interpreter.
-+int CompiledStaticCall::reloc_to_interp_stub() {
-+  return 4; // 3 in emit_to_interp_stub + 1 in emit_call
-+}
++  // Here's a sticky one. This constructor can be called via AsyncGetCallTrace
++  // when last_Java_sp is non-null but the pc fetched is junk. If we are truly
++  // unlucky the junk value could be to a zombied method and we'll die on the
++  // find_blob call. This is also why we can have no asserts on the validity
++  // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
++  // -> pd_last_frame should use a specialized version of pd_last_frame which could
++  // call a specilaized frame constructor instead of this one.
++  // Then we could use the assert below. However this assert is of somewhat dubious
++  // value.
 +
-+void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) {
-+  address stub = find_stub();
-+  guarantee(stub != NULL, "stub not found");
++  _cb = CodeCache::find_blob(_pc);
++  adjust_unextended_sp();
 +
-+  if (TraceICs) {
-+    ResourceMark rm;
-+    tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s",
-+                  p2i(instruction_address()),
-+                  callee->name_and_sig_as_C_string());
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
 +  }
-+
-+  // Creation also verifies the object.
-+  NativeMovConstReg* method_holder
-+    = nativeMovConstReg_at(stub + NativeFenceI::instruction_size());
-+#ifdef ASSERT
-+  NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address());
-+
-+  verify_mt_safe(callee, entry, method_holder, jump);
-+#endif
-+  // Update stub.
-+  method_holder->set_data((intptr_t)callee());
-+  NativeGeneralJump::insert_unconditional(method_holder->next_instruction_address(), entry);
-+  ICache::invalidate_range(stub, to_interp_stub_size());
-+  // Update jump to call.
-+  set_destination_mt_safe(stub);
 +}
 +
-+void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
-+  // Reset stub.
-+  address stub = static_stub->addr();
-+  assert(stub != NULL, "stub not found");
-+  assert(CompiledICLocker::is_safe(stub), "mt unsafe call");
-+  // Creation also verifies the object.
-+  NativeMovConstReg* method_holder
-+    = nativeMovConstReg_at(stub + NativeFenceI::instruction_size());
-+  method_holder->set_data(0);
-+  NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
-+  jump->set_jump_destination((address)-1);
++// Accessors
++
++inline bool frame::equal(frame other) const {
++  bool ret =  sp() == other.sp() &&
++              unextended_sp() == other.unextended_sp() &&
++              fp() == other.fp() &&
++              pc() == other.pc();
++  assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction");
++  return ret;
 +}
 +
-+//-----------------------------------------------------------------------------
-+// Non-product mode code
-+#ifndef PRODUCT
++// Return unique id for this frame. The id must have a value where we can distinguish
++// identity and younger/older relationship. NULL represents an invalid (incomparable)
++// frame.
++inline intptr_t* frame::id(void) const { return unextended_sp(); }
 +
-+void CompiledDirectStaticCall::verify() {
-+  // Verify call.
-+  _call->verify();
-+  _call->verify_alignment();
++// Return true if the frame is older (less recent activation) than the frame represented by id
++inline bool frame::is_older(intptr_t* id) const   { assert(this->id() != NULL && id != NULL, "NULL frame id");
++                                                    return this->id() > id ; }
 +
-+  // Verify stub.
-+  address stub = find_stub();
-+  assert(stub != NULL, "no stub found for static call");
-+  // Creation also verifies the object.
-+  NativeMovConstReg* method_holder
-+    = nativeMovConstReg_at(stub + NativeFenceI::instruction_size());
-+  NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
++inline intptr_t* frame::link() const              { return (intptr_t*) *(intptr_t **)addr_at(link_offset); }
 +
-+  // Verify state.
-+  assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
++inline intptr_t* frame::link_or_null() const {
++  intptr_t** ptr = (intptr_t **)addr_at(link_offset);
++  return os::is_readable_pointer(ptr) ? *ptr : NULL;
 +}
 +
-+#endif // !PRODUCT
-diff --git a/src/hotspot/cpu/riscv/copy_riscv.hpp b/src/hotspot/cpu/riscv/copy_riscv.hpp
-new file mode 100644
-index 00000000000..bceadcc5dcc
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/copy_riscv.hpp
-@@ -0,0 +1,136 @@
-+/*
-+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#ifndef CPU_RISCV_COPY_RISCV_HPP
-+#define CPU_RISCV_COPY_RISCV_HPP
++inline intptr_t* frame::unextended_sp() const     { return _unextended_sp; }
 +
-+#include OS_CPU_HEADER(copy)
++// Return address
++inline address* frame::sender_pc_addr() const     { return (address*) addr_at(return_addr_offset); }
++inline address  frame::sender_pc() const          { return *sender_pc_addr(); }
++inline intptr_t* frame::sender_sp() const         { return addr_at(sender_sp_offset); }
 +
-+static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
-+  julong* to = (julong*) tohw;
-+  julong  v  = ((julong) value << 32) | value;
-+  while (count-- > 0) {
-+    *to++ = v;
-+  }
++inline intptr_t** frame::interpreter_frame_locals_addr() const {
++  return (intptr_t**)addr_at(interpreter_frame_locals_offset);
 +}
 +
-+static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
-+  pd_fill_to_words(tohw, count, value);
++inline intptr_t* frame::interpreter_frame_last_sp() const {
++  return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset);
 +}
 +
-+static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
-+  (void)memset(to, value, count);
++inline intptr_t* frame::interpreter_frame_bcp_addr() const {
++  return (intptr_t*)addr_at(interpreter_frame_bcp_offset);
 +}
 +
-+static void pd_zero_to_words(HeapWord* tohw, size_t count) {
-+  pd_fill_to_words(tohw, count, 0);
++inline intptr_t* frame::interpreter_frame_mdp_addr() const {
++  return (intptr_t*)addr_at(interpreter_frame_mdp_offset);
 +}
 +
-+static void pd_zero_to_bytes(void* to, size_t count) {
-+  (void)memset(to, 0, count);
-+}
 +
-+static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
-+  (void)memmove(to, from, count * HeapWordSize);
-+}
++// Constant pool cache
 +
-+static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
-+  switch (count) {
-+    case 8:  to[7] = from[7];   // fall through
-+    case 7:  to[6] = from[6];   // fall through
-+    case 6:  to[5] = from[5];   // fall through
-+    case 5:  to[4] = from[4];   // fall through
-+    case 4:  to[3] = from[3];   // fall through
-+    case 3:  to[2] = from[2];   // fall through
-+    case 2:  to[1] = from[1];   // fall through
-+    case 1:  to[0] = from[0];   // fall through
-+    case 0:  break;
-+    default:
-+      memcpy(to, from, count * HeapWordSize);
-+      break;
-+  }
++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
++  return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset);
 +}
 +
-+static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
-+  shared_disjoint_words_atomic(from, to, count);
-+}
++// Method
 +
-+static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
-+  pd_conjoint_words(from, to, count);
++inline Method** frame::interpreter_frame_method_addr() const {
++  return (Method**)addr_at(interpreter_frame_method_offset);
 +}
 +
-+static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
-+  pd_disjoint_words(from, to, count);
-+}
++// Mirror
 +
-+static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
-+  (void)memmove(to, from, count);
++inline oop* frame::interpreter_frame_mirror_addr() const {
++  return (oop*)addr_at(interpreter_frame_mirror_offset);
 +}
 +
-+static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
-+  pd_conjoint_bytes(from, to, count);
++// top of expression stack
++inline intptr_t* frame::interpreter_frame_tos_address() const {
++  intptr_t* last_sp = interpreter_frame_last_sp();
++  if (last_sp == NULL) {
++    return sp();
++  } else {
++    // sp() may have been extended or shrunk by an adapter.  At least
++    // check that we don't fall behind the legal region.
++    // For top deoptimized frame last_sp == interpreter_frame_monitor_end.
++    assert(last_sp <= (intptr_t*) interpreter_frame_monitor_end(), "bad tos");
++    return last_sp;
++  }
 +}
 +
-+static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
-+  _Copy_conjoint_jshorts_atomic(from, to, count);
++inline oop* frame::interpreter_frame_temp_oop_addr() const {
++  return (oop *)(fp() + interpreter_frame_oop_temp_offset);
 +}
 +
-+static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
-+  _Copy_conjoint_jints_atomic(from, to, count);
++inline int frame::interpreter_frame_monitor_size() {
++  return BasicObjectLock::size();
 +}
 +
-+static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
-+  _Copy_conjoint_jlongs_atomic(from, to, count);
-+}
 +
-+static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
-+  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size.");
-+  _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count);
-+}
++// expression stack
++// (the max_stack arguments are used by the GC; see class FrameClosure)
 +
-+static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
-+  _Copy_arrayof_conjoint_bytes(from, to, count);
++inline intptr_t* frame::interpreter_frame_expression_stack() const {
++  intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end();
++  return monitor_end-1;
 +}
 +
-+static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) {
-+  _Copy_arrayof_conjoint_jshorts(from, to, count);
-+}
 +
-+static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) {
-+  _Copy_arrayof_conjoint_jints(from, to, count);
++// Entry frames
++
++inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
++ return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset);
 +}
 +
-+static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) {
-+  _Copy_arrayof_conjoint_jlongs(from, to, count);
++
++// Compiled frames
++inline oop frame::saved_oop_result(RegisterMap* map) const {
++  oop* result_adr = (oop *)map->location(x10->as_VMReg());
++  guarantee(result_adr != NULL, "bad register save location");
++  return (*result_adr);
 +}
 +
-+static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) {
-+  assert(!UseCompressedOops, "foo!");
-+  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
-+  _Copy_arrayof_conjoint_jlongs(from, to, count);
++inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
++  oop* result_adr = (oop *)map->location(x10->as_VMReg());
++  guarantee(result_adr != NULL, "bad register save location");
++  *result_adr = obj;
 +}
 +
-+#endif // CPU_RISCV_COPY_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/disassembler_riscv.hpp b/src/hotspot/cpu/riscv/disassembler_riscv.hpp
++#endif // CPU_RISCV_FRAME_RISCV_INLINE_HPP
+diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
 new file mode 100644
-index 00000000000..b0e5560c906
+index 0000000000..c5ccf040c7
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/disassembler_riscv.hpp
-@@ -0,0 +1,58 @@
++++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
+@@ -0,0 +1,475 @@
 +/*
-+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -14096,46 +13244,464 @@ index 00000000000..b0e5560c906
 + *
 + */
 +
-+#ifndef CPU_RISCV_DISASSEMBLER_RISCV_HPP
-+#define CPU_RISCV_DISASSEMBLER_RISCV_HPP
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/g1/g1BarrierSet.hpp"
++#include "gc/g1/g1BarrierSetAssembler.hpp"
++#include "gc/g1/g1BarrierSetRuntime.hpp"
++#include "gc/g1/g1CardTable.hpp"
++#include "gc/g1/g1ThreadLocalData.hpp"
++#include "gc/g1/heapRegion.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/thread.hpp"
++#ifdef COMPILER1
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "gc/g1/c1/g1BarrierSetC1.hpp"
++#endif
 +
-+static int pd_instruction_alignment() {
-+  return 1;
-+}
++#define __ masm->
++
++void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                            Register addr, Register count, RegSet saved_regs) {
++  bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
++  if (!dest_uninitialized) {
++    Label done;
++    Address in_progress(xthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
++
++    // Is marking active?
++    if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
++      __ lwu(t0, in_progress);
++    } else {
++      assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++      __ lbu(t0, in_progress);
++    }
++    __ beqz(t0, done);
++
++    __ push_reg(saved_regs, sp);
++    if (count == c_rarg0) {
++      if (addr == c_rarg1) {
++        // exactly backwards!!
++        __ mv(t0, c_rarg0);
++        __ mv(c_rarg0, c_rarg1);
++        __ mv(c_rarg1, t0);
++      } else {
++        __ mv(c_rarg1, count);
++        __ mv(c_rarg0, addr);
++      }
++    } else {
++      __ mv(c_rarg0, addr);
++      __ mv(c_rarg1, count);
++    }
++    if (UseCompressedOops) {
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2);
++    } else {
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2);
++    }
++    __ pop_reg(saved_regs, sp);
++
++    __ bind(done);
++  }
++}
++
++void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                             Register start, Register count, Register tmp, RegSet saved_regs) {
++  __ push_reg(saved_regs, sp);
++  assert_different_registers(start, count, tmp);
++  assert_different_registers(c_rarg0, count);
++  __ mv(c_rarg0, start);
++  __ mv(c_rarg1, count);
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2);
++  __ pop_reg(saved_regs, sp);
++}
++
++void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
++                                                 Register obj,
++                                                 Register pre_val,
++                                                 Register thread,
++                                                 Register tmp,
++                                                 bool tosca_live,
++                                                 bool expand_call) {
++  // If expand_call is true then we expand the call_VM_leaf macro
++  // directly to skip generating the check by
++  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
++
++  assert(thread == xthread, "must be");
++
++  Label done;
++  Label runtime;
++
++  assert_different_registers(obj, pre_val, tmp, t0);
++  assert(pre_val != noreg &&  tmp != noreg, "expecting a register");
++
++  Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
++  Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
++
++  // Is marking active?
++  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { // 4-byte width
++    __ lwu(tmp, in_progress);
++  } else {
++    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++    __ lbu(tmp, in_progress);
++  }
++  __ beqz(tmp, done);
++
++  // Do we need to load the previous value?
++  if (obj != noreg) {
++    __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
++  }
++
++  // Is the previous value null?
++  __ beqz(pre_val, done);
++
++  // Can we store original value in the thread's buffer?
++  // Is index == 0?
++  // (The index field is typed as size_t.)
++
++  __ ld(tmp, index);                       // tmp := *index_adr
++  __ beqz(tmp, runtime);                   // tmp == 0?
++                                           // If yes, goto runtime
++
++  __ sub(tmp, tmp, wordSize);              // tmp := tmp - wordSize
++  __ sd(tmp, index);                       // *index_adr := tmp
++  __ ld(t0, buffer);
++  __ add(tmp, tmp, t0);                    // tmp := tmp + *buffer_adr
++
++  // Record the previous value
++  __ sd(pre_val, Address(tmp, 0));
++  __ j(done);
++
++  __ bind(runtime);
++
++  __ push_call_clobbered_registers();
++  if (expand_call) {
++    assert(pre_val != c_rarg1, "smashed arg");
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
++  } else {
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
++  }
++  __ pop_call_clobbered_registers();
++
++  __ bind(done);
 +
-+static const char* pd_cpu_opts() {
-+  return "";
 +}
 +
-+// Returns address of n-th instruction preceding addr,
-+// NULL if no preceding instruction can be found.
-+// On riscv, we assume a constant instruction length.
-+// It might be beneficial to check "is_readable" as we do on ppc and s390.
-+static address find_prev_instr(address addr, int n_instr) {
-+  return addr - Assembler::instruction_size * n_instr;
++void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
++                                                  Register store_addr,
++                                                  Register new_val,
++                                                  Register thread,
++                                                  Register tmp,
++                                                  Register tmp2) {
++  assert(thread == xthread, "must be");
++  assert_different_registers(store_addr, new_val, thread, tmp, tmp2,
++                             t0);
++  assert(store_addr != noreg && new_val != noreg && tmp != noreg &&
++         tmp2 != noreg, "expecting a register");
++
++  Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
++
++  BarrierSet* bs = BarrierSet::barrier_set();
++  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
++  CardTable* ct = ctbs->card_table();
++  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
++
++  Label done;
++  Label runtime;
++
++  // Does store cross heap regions?
++
++  __ xorr(tmp, store_addr, new_val);
++  __ srli(tmp, tmp, HeapRegion::LogOfHRGrainBytes);
++  __ beqz(tmp, done);
++
++  // crosses regions, storing NULL?
++
++  __ beqz(new_val, done);
++
++  // storing region crossing non-NULL, is card already dirty?
++
++  ExternalAddress cardtable((address) ct->byte_map_base());
++  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
++  const Register card_addr = tmp;
++
++  __ srli(card_addr, store_addr, CardTable::card_shift);
++
++  // get the address of the card
++  __ load_byte_map_base(tmp2);
++  __ add(card_addr, card_addr, tmp2);
++  __ lbu(tmp2, Address(card_addr));
++  __ mv(t0, (int)G1CardTable::g1_young_card_val());
++  __ beq(tmp2, t0, done);
++
++  assert((int)CardTable::dirty_card_val() == 0, "must be 0");
++
++  __ membar(MacroAssembler::StoreLoad);
++
++  __ lbu(tmp2, Address(card_addr));
++  __ beqz(tmp2, done);
++
++  // storing a region crossing, non-NULL oop, card is clean.
++  // dirty card and log.
++
++  __ sb(zr, Address(card_addr));
++
++  __ ld(t0, queue_index);
++  __ beqz(t0, runtime);
++  __ sub(t0, t0, wordSize);
++  __ sd(t0, queue_index);
++
++  __ ld(tmp2, buffer);
++  __ add(t0, tmp2, t0);
++  __ sd(card_addr, Address(t0, 0));
++  __ j(done);
++
++  __ bind(runtime);
++  // save the live input values
++  RegSet saved = RegSet::of(store_addr, new_val);
++  __ push_reg(saved, sp);
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
++  __ pop_reg(saved, sp);
++
++  __ bind(done);
 +}
 +
-+// special-case instruction decoding.
-+// There may be cases where the binutils disassembler doesn't do
-+// the perfect job. In those cases, decode_instruction0 may kick in
-+// and do it right.
-+// If nothing had to be done, just return "here", otherwise return "here + instr_len(here)"
-+static address decode_instruction0(address here, outputStream* st, address virtual_begin = NULL) {
-+  return here;
++void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                    Register dst, Address src, Register tmp1, Register tmp_thread) {
++  bool on_oop = is_reference_type(type);
++  bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
++  bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
++  bool on_reference = on_weak || on_phantom;
++  ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
++  if (on_oop && on_reference) {
++    // RA is live.  It must be saved around calls.
++    __ enter(); // barrier may call runtime
++    // Generate the G1 pre-barrier code to log the value of
++    // the referent field in an SATB buffer.
++    g1_write_barrier_pre(masm /* masm */,
++                         noreg /* obj */,
++                         dst /* pre_val */,
++                         xthread /* thread */,
++                         tmp1 /* tmp */,
++                         true /* tosca_live */,
++                         true /* expand_call */);
++    __ leave();
++  }
 +}
 +
-+// platform-specific instruction annotations (like value of loaded constants)
-+static void annotate(address pc, outputStream* st) {}
++void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                         Address dst, Register val, Register tmp1, Register tmp2) {
++  // flatten object address if needed
++  if (dst.offset() == 0) {
++    if (dst.base() != x13) {
++      __ mv(x13, dst.base());
++    }
++  } else {
++    __ la(x13, dst);
++  }
++
++  g1_write_barrier_pre(masm,
++                       x13 /* obj */,
++                       tmp2 /* pre_val */,
++                       xthread /* thread */,
++                       tmp1  /* tmp */,
++                       val != noreg /* tosca_live */,
++                       false /* expand_call */);
 +
-+#endif // CPU_RISCV_DISASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp b/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp
++  if (val == noreg) {
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), noreg, noreg, noreg);
++  } else {
++    // G1 barrier needs uncompressed oop for region cross check.
++    Register new_val = val;
++    if (UseCompressedOops) {
++      new_val = t1;
++      __ mv(new_val, val);
++    }
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), val, noreg, noreg);
++    g1_write_barrier_post(masm,
++                          x13 /* store_adr */,
++                          new_val /* new_val */,
++                          xthread /* thread */,
++                          tmp1 /* tmp */,
++                          tmp2 /* tmp2 */);
++  }
++}
++
++#ifdef COMPILER1
++
++#undef __
++#define __ ce->masm()->
++
++void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) {
++  G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
++
++  // At this point we know that marking is in progress.
++  // If do_load() is true then we have to emit the
++  // load of the previous value; otherwise it has already
++  // been loaded into _pre_val.
++  __ bind(*stub->entry());
++
++  assert(stub->pre_val()->is_register(), "Precondition.");
++
++  Register pre_val_reg = stub->pre_val()->as_register();
++
++  if (stub->do_load()) {
++    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/);
++  }
++  __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true);
++  ce->store_parameter(stub->pre_val()->as_register(), 0);
++  __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
++  __ j(*stub->continuation());
++}
++
++void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
++  G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
++  __ bind(*stub->entry());
++  assert(stub->addr()->is_register(), "Precondition");
++  assert(stub->new_val()->is_register(), "Precondition");
++  Register new_val_reg = stub->new_val()->as_register();
++  __ beqz(new_val_reg, *stub->continuation(), /* is_far */ true);
++  ce->store_parameter(stub->addr()->as_pointer_register(), 0);
++  __ far_call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin()));
++  __ j(*stub->continuation());
++}
++
++#undef __
++
++#define __ sasm->
++
++void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
++  __ prologue("g1_pre_barrier", false);
++
++  BarrierSet* bs = BarrierSet::barrier_set();
++
++  // arg0 : previous value of memory
++  const Register pre_val = x10;
++  const Register thread = xthread;
++  const Register tmp = t0;
++
++  Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
++  Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
++
++  Label done;
++  Label runtime;
++
++  // Is marking still active?
++  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {  // 4-byte width
++    __ lwu(tmp, in_progress);
++  } else {
++    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++    __ lbu(tmp, in_progress);
++  }
++  __ beqz(tmp, done);
++
++  // Can we store original value in the thread's buffer?
++  __ ld(tmp, queue_index);
++  __ beqz(tmp, runtime);
++
++  __ sub(tmp, tmp, wordSize);
++  __ sd(tmp, queue_index);
++  __ ld(t1, buffer);
++  __ add(tmp, tmp, t1);
++  __ load_parameter(0, t1);
++  __ sd(t1, Address(tmp, 0));
++  __ j(done);
++
++  __ bind(runtime);
++  __ push_call_clobbered_registers();
++  __ load_parameter(0, pre_val);
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
++  __ pop_call_clobbered_registers();
++  __ bind(done);
++
++  __ epilogue();
++}
++
++void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
++  __ prologue("g1_post_barrier", false);
++
++  // arg0 : store_address
++  Address store_addr(fp, 2 * BytesPerWord); // 2 BytesPerWord from fp
++
++  BarrierSet* bs = BarrierSet::barrier_set();
++  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
++  CardTable* ct = ctbs->card_table();
++  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
++
++  Label done;
++  Label runtime;
++
++  // At this point we know new_value is non-NULL and the new_value crosses regions.
++  // Must check to see if card is already dirty
++  const Register thread = xthread;
++
++  Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
++
++  const Register card_offset = t1;
++  // RA is free here, so we can use it to hold the byte_map_base.
++  const Register byte_map_base = ra;
++
++  assert_different_registers(card_offset, byte_map_base, t0);
++
++  __ load_parameter(0, card_offset);
++  __ srli(card_offset, card_offset, CardTable::card_shift);
++  __ load_byte_map_base(byte_map_base);
++
++  // Convert card offset into an address in card_addr
++  Register card_addr = card_offset;
++  __ add(card_addr, byte_map_base, card_addr);
++
++  __ lbu(t0, Address(card_addr, 0));
++  __ sub(t0, t0, (int)G1CardTable::g1_young_card_val());
++  __ beqz(t0, done);
++
++  assert((int)CardTable::dirty_card_val() == 0, "must be 0");
++
++  __ membar(MacroAssembler::StoreLoad);
++  __ lbu(t0, Address(card_addr, 0));
++  __ beqz(t0, done);
++
++  // storing region crossing non-NULL, card is clean.
++  // dirty card and log.
++  __ sb(zr, Address(card_addr, 0));
++
++  __ ld(t0, queue_index);
++  __ beqz(t0, runtime);
++  __ sub(t0, t0, wordSize);
++  __ sd(t0, queue_index);
++
++  // Reuse RA to hold buffer_addr
++  const Register buffer_addr = ra;
++
++  __ ld(buffer_addr, buffer);
++  __ add(t0, buffer_addr, t0);
++  __ sd(card_addr, Address(t0, 0));
++  __ j(done);
++
++  __ bind(runtime);
++  __ push_call_clobbered_registers();
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
++  __ pop_call_clobbered_registers();
++  __ bind(done);
++  __ epilogue();
++}
++
++#undef __
++
++#endif // COMPILER1
+diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
 new file mode 100644
-index 00000000000..5c700be9c91
+index 0000000000..37bc183f39
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp
-@@ -0,0 +1,44 @@
++++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
+@@ -0,0 +1,78 @@
 +/*
-+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -14159,34 +13725,68 @@ index 00000000000..5c700be9c91
 + *
 + */
 +
-+#include "precompiled.hpp"
-+#include "prims/foreign_globals.hpp"
-+#include "utilities/debug.hpp"
++#ifndef CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
++#define CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
 +
-+// Stubbed out, implement later
-+const ABIDescriptor ForeignGlobals::parse_abi_descriptor_impl(jobject jabi) const {
-+  Unimplemented();
-+  return {};
-+}
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
++#include "utilities/macros.hpp"
 +
-+const BufferLayout ForeignGlobals::parse_buffer_layout_impl(jobject jlayout) const {
-+  Unimplemented();
-+  return {};
-+}
++#ifdef COMPILER1
++class LIR_Assembler;
++#endif
++class StubAssembler;
++class G1PreBarrierStub;
++class G1PostBarrierStub;
 +
-+const CallRegs ForeignGlobals::parse_call_regs_impl(jobject jconv) const {
-+  ShouldNotCallThis();
-+  return {};
-+}
-diff --git a/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp b/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp
++class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
++protected:
++  void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                       Register addr, Register count, RegSet saved_regs);
++  void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                        Register start, Register count, Register tmp, RegSet saved_regs);
++
++  void g1_write_barrier_pre(MacroAssembler* masm,
++                            Register obj,
++                            Register pre_val,
++                            Register thread,
++                            Register tmp,
++                            bool tosca_live,
++                            bool expand_call);
++
++  void g1_write_barrier_post(MacroAssembler* masm,
++                             Register store_addr,
++                             Register new_val,
++                             Register thread,
++                             Register tmp,
++                             Register tmp2);
++
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2);
++
++public:
++#ifdef COMPILER1
++  void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
++  void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
++
++  void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
++  void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
++#endif
++
++  void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++               Register dst, Address src, Register tmp1, Register tmp_thread);
++};
++
++#endif // CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp
 new file mode 100644
-index 00000000000..3ac89752c27
+index 0000000000..8735fd014f
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp
-@@ -0,0 +1,32 @@
++++ b/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp
+@@ -0,0 +1,31 @@
 +/*
-+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -14209,22 +13809,20 @@ index 00000000000..3ac89752c27
 + *
 + */
 +
-+#ifndef CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP
-+#define CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP
++#ifndef CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP
++#define CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP
 +
-+class ABIDescriptor {};
-+class BufferLayout {};
++const size_t G1MergeHeapRootsPrefetchCacheSize = 16;
 +
-+#endif // CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
++#endif // CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
 new file mode 100644
-index 00000000000..6e38960598a
+index 0000000000..2b439280fa
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
-@@ -0,0 +1,697 @@
++++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
+@@ -0,0 +1,225 @@
 +/*
-+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -14249,685 +13847,532 @@ index 00000000000..6e38960598a
 + */
 +
 +#include "precompiled.hpp"
-+#include "compiler/oopMap.hpp"
-+#include "interpreter/interpreter.hpp"
-+#include "memory/resourceArea.hpp"
-+#include "memory/universe.hpp"
-+#include "oops/markWord.hpp"
-+#include "oops/method.hpp"
-+#include "oops/oop.inline.hpp"
-+#include "prims/methodHandles.hpp"
-+#include "runtime/frame.inline.hpp"
-+#include "runtime/handles.inline.hpp"
-+#include "runtime/javaCalls.hpp"
-+#include "runtime/monitorChunk.hpp"
-+#include "runtime/os.inline.hpp"
-+#include "runtime/signature.hpp"
-+#include "runtime/stackWatermarkSet.hpp"
-+#include "runtime/stubCodeGenerator.hpp"
++#include "classfile/classLoaderData.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "memory/universe.hpp"
++#include "runtime/jniHandles.hpp"
++#include "runtime/sharedRuntime.hpp"
 +#include "runtime/stubRoutines.hpp"
-+#include "vmreg_riscv.inline.hpp"
-+#ifdef COMPILER1
-+#include "c1/c1_Runtime1.hpp"
-+#include "runtime/vframeArray.hpp"
-+#endif
-+
-+#ifdef ASSERT
-+void RegisterMap::check_location_valid() {
-+}
-+#endif
++#include "runtime/thread.hpp"
 +
++#define __ masm->
 +
-+// Profiling/safepoint support
++void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                  Register dst, Address src, Register tmp1, Register tmp_thread) {
++  // RA is live. It must be saved around calls.
 +
-+bool frame::safe_for_sender(JavaThread *thread) {
-+  address   addr_sp = (address)_sp;
-+  address   addr_fp = (address)_fp;
-+  address   unextended_sp = (address)_unextended_sp;
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool in_native = (decorators & IN_NATIVE) != 0;
++  bool is_not_null = (decorators & IS_NOT_NULL) != 0;
++  switch (type) {
++    case T_OBJECT:  // fall through
++    case T_ARRAY: {
++      if (in_heap) {
++        if (UseCompressedOops) {
++          __ lwu(dst, src);
++          if (is_not_null) {
++            __ decode_heap_oop_not_null(dst);
++          } else {
++            __ decode_heap_oop(dst);
++          }
++        } else {
++          __ ld(dst, src);
++        }
++      } else {
++        assert(in_native, "why else?");
++        __ ld(dst, src);
++      }
++      break;
++    }
++    case T_BOOLEAN: __ load_unsigned_byte (dst, src); break;
++    case T_BYTE:    __ load_signed_byte   (dst, src); break;
++    case T_CHAR:    __ load_unsigned_short(dst, src); break;
++    case T_SHORT:   __ load_signed_short  (dst, src); break;
++    case T_INT:     __ lw                 (dst, src); break;
++    case T_LONG:    __ ld                 (dst, src); break;
++    case T_ADDRESS: __ ld                 (dst, src); break;
++    case T_FLOAT:   __ flw                (f10, src); break;
++    case T_DOUBLE:  __ fld                (f10, src); break;
++    default: Unimplemented();
++  }
++}
 +
-+  // consider stack guards when trying to determine "safe" stack pointers
-+  // sp must be within the usable part of the stack (not in guards)
-+  if (!thread->is_in_usable_stack(addr_sp)) {
-+    return false;
++void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                   Address dst, Register val, Register tmp1, Register tmp2) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool in_native = (decorators & IN_NATIVE) != 0;
++  switch (type) {
++    case T_OBJECT: // fall through
++    case T_ARRAY: {
++      val = val == noreg ? zr : val;
++      if (in_heap) {
++        if (UseCompressedOops) {
++          assert(!dst.uses(val), "not enough registers");
++          if (val != zr) {
++            __ encode_heap_oop(val);
++          }
++          __ sw(val, dst);
++        } else {
++          __ sd(val, dst);
++        }
++      } else {
++        assert(in_native, "why else?");
++        __ sd(val, dst);
++      }
++      break;
++    }
++    case T_BOOLEAN:
++      __ andi(val, val, 0x1);  // boolean is true if LSB is 1
++      __ sb(val, dst);
++      break;
++    case T_BYTE:    __ sb(val, dst); break;
++    case T_CHAR:    __ sh(val, dst); break;
++    case T_SHORT:   __ sh(val, dst); break;
++    case T_INT:     __ sw(val, dst); break;
++    case T_LONG:    __ sd(val, dst); break;
++    case T_ADDRESS: __ sd(val, dst); break;
++    case T_FLOAT:   __ fsw(f10,  dst); break;
++    case T_DOUBLE:  __ fsd(f10,  dst); break;
++    default: Unimplemented();
 +  }
 +
-+  // When we are running interpreted code the machine stack pointer, SP, is
-+  // set low enough so that the Java expression stack can grow and shrink
-+  // without ever exceeding the machine stack bounds.  So, ESP >= SP.
++}
 +
-+  // When we call out of an interpreted method, SP is incremented so that
-+  // the space between SP and ESP is removed.  The SP saved in the callee's
-+  // frame is the SP *before* this increment.  So, when we walk a stack of
-+  // interpreter frames the sender's SP saved in a frame might be less than
-+  // the SP at the point of call.
++void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                                        Register obj, Register tmp, Label& slowpath) {
++  // If mask changes we need to ensure that the inverse is still encodable as an immediate
++  STATIC_ASSERT(JNIHandles::weak_tag_mask == 1);
++  __ andi(obj, obj, ~JNIHandles::weak_tag_mask);
++  __ ld(obj, Address(obj, 0));             // *obj
++}
 +
-+  // So unextended sp must be within the stack but we need not to check
-+  // that unextended sp >= sp
++// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes.
++void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj,
++                                        Register var_size_in_bytes,
++                                        int con_size_in_bytes,
++                                        Register tmp1,
++                                        Register tmp2,
++                                        Label& slow_case,
++                                        bool is_far) {
++  assert_different_registers(obj, tmp2);
++  assert_different_registers(obj, var_size_in_bytes);
++  Register end = tmp2;
 +
-+  if (!thread->is_in_full_stack_checked(unextended_sp)) {
-+    return false;
++  __ ld(obj, Address(xthread, JavaThread::tlab_top_offset()));
++  if (var_size_in_bytes == noreg) {
++    __ la(end, Address(obj, con_size_in_bytes));
++  } else {
++    __ add(end, obj, var_size_in_bytes);
 +  }
++  __ ld(t0, Address(xthread, JavaThread::tlab_end_offset()));
++  __ bgtu(end, t0, slow_case, is_far);
 +
-+  // an fp must be within the stack and above (but not equal) sp
-+  // second evaluation on fp+ is added to handle situation where fp is -1
-+  bool fp_safe = thread->is_in_stack_range_excl(addr_fp, addr_sp) &&
-+                 thread->is_in_full_stack_checked(addr_fp + (return_addr_offset * sizeof(void*)));
-+
-+  // We know sp/unextended_sp are safe only fp is questionable here
-+
-+  // If the current frame is known to the code cache then we can attempt to
-+  // to construct the sender and do some validation of it. This goes a long way
-+  // toward eliminating issues when we get in frame construction code
-+
-+  if (_cb != NULL) {
++  // update the tlab top pointer
++  __ sd(end, Address(xthread, JavaThread::tlab_top_offset()));
 +
-+    // First check if frame is complete and tester is reliable
-+    // Unfortunately we can only check frame complete for runtime stubs and nmethod
-+    // other generic buffer blobs are more problematic so we just assume they are
-+    // ok. adapter blobs never have a frame complete and are never ok.
++  // recover var_size_in_bytes if necessary
++  if (var_size_in_bytes == end) {
++    __ sub(var_size_in_bytes, var_size_in_bytes, obj);
++  }
++}
 +
-+    if (!_cb->is_frame_complete_at(_pc)) {
-+      if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) {
-+        return false;
-+      }
-+    }
++// Defines obj, preserves var_size_in_bytes
++void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj,
++                                        Register var_size_in_bytes,
++                                        int con_size_in_bytes,
++                                        Register tmp1,
++                                        Label& slow_case,
++                                        bool is_far) {
++  assert_cond(masm != NULL);
++  assert_different_registers(obj, var_size_in_bytes, tmp1);
++  if (!Universe::heap()->supports_inline_contig_alloc()) {
++    __ j(slow_case);
++  } else {
++    Register end = tmp1;
++    Label retry;
++    __ bind(retry);
 +
-+    // Could just be some random pointer within the codeBlob
-+    if (!_cb->code_contains(_pc)) {
-+      return false;
++    // Get the current end of the heap
++    ExternalAddress address_end((address) Universe::heap()->end_addr());
++    {
++      int32_t offset;
++      __ la_patchable(t1, address_end, offset);
++      __ ld(t1, Address(t1, offset));
 +    }
 +
-+    // Entry frame checks
-+    if (is_entry_frame()) {
-+      // an entry frame must have a valid fp.
-+      return fp_safe && is_entry_frame_valid(thread);
++    // Get the current top of the heap
++    ExternalAddress address_top((address) Universe::heap()->top_addr());
++    {
++      int32_t offset;
++      __ la_patchable(t0, address_top, offset);
++      __ addi(t0, t0, offset);
++      __ lr_d(obj, t0, Assembler::aqrl);
 +    }
 +
-+    intptr_t* sender_sp = NULL;
-+    intptr_t* sender_unextended_sp = NULL;
-+    address   sender_pc = NULL;
-+    intptr_t* saved_fp =  NULL;
-+
-+    if (is_interpreted_frame()) {
-+      // fp must be safe
-+      if (!fp_safe) {
-+        return false;
-+      }
-+
-+      sender_pc = (address)this->fp()[return_addr_offset];
-+      // for interpreted frames, the value below is the sender "raw" sp,
-+      // which can be different from the sender unextended sp (the sp seen
-+      // by the sender) because of current frame local variables
-+      sender_sp = (intptr_t*) addr_at(sender_sp_offset);
-+      sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset];
-+      saved_fp = (intptr_t*) this->fp()[link_offset];
++    // Adjust it my the size of our new object
++    if (var_size_in_bytes == noreg) {
++      __ la(end, Address(obj, con_size_in_bytes));
 +    } else {
-+      // must be some sort of compiled/runtime frame
-+      // fp does not have to be safe (although it could be check for c1?)
-+
-+      // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc
-+      if (_cb->frame_size() <= 0) {
-+        return false;
-+      }
-+
-+      sender_sp = _unextended_sp + _cb->frame_size();
-+      // Is sender_sp safe?
-+      if (!thread->is_in_full_stack_checked((address)sender_sp)) {
-+        return false;
-+      }
-+
-+      sender_unextended_sp = sender_sp;
-+      sender_pc = (address) *(sender_sp - 1);
-+      saved_fp = (intptr_t*) *(sender_sp - 2);
++      __ add(end, obj, var_size_in_bytes);
 +    }
 +
++    // if end < obj then we wrapped around high memory
++    __ bltu(end, obj, slow_case, is_far);
 +
-+    // If the potential sender is the interpreter then we can do some more checking
-+    if (Interpreter::contains(sender_pc)) {
++    __ bgtu(end, t1, slow_case, is_far);
 +
-+      // fp is always saved in a recognizable place in any code we generate. However
-+      // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved fp
-+      // is really a frame pointer.
-+      if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) {
-+        return false;
-+      }
++    // If heap_top hasn't been changed by some other thread, update it.
++    __ sc_d(t1, end, t0, Assembler::rl);
++    __ bnez(t1, retry);
 +
-+      // construct the potential sender
-+      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
++    incr_allocated_bytes(masm, var_size_in_bytes, con_size_in_bytes, tmp1);
++  }
++}
 +
-+      return sender.is_interpreted_frame_valid(thread);
-+    }
++void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm,
++                                               Register var_size_in_bytes,
++                                               int con_size_in_bytes,
++                                               Register tmp1) {
++  assert(tmp1->is_valid(), "need temp reg");
 +
-+    // We must always be able to find a recognizable pc
-+    CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc);
-+    if (sender_pc == NULL || sender_blob == NULL) {
-+      return false;
-+    }
++  __ ld(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset())));
++  if (var_size_in_bytes->is_valid()) {
++    __ add(tmp1, tmp1, var_size_in_bytes);
++  } else {
++    __ add(tmp1, tmp1, con_size_in_bytes);
++  }
++  __ sd(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset())));
++}
+diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
+new file mode 100644
+index 0000000000..984d94f4c3
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
+@@ -0,0 +1,76 @@
++/*
++ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+    // Could be a zombie method
-+    if (sender_blob->is_zombie() || sender_blob->is_unloaded()) {
-+      return false;
-+    }
++#ifndef CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
++#define CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
 +
-+    // Could just be some random pointer within the codeBlob
-+    if (!sender_blob->code_contains(sender_pc)) {
-+      return false;
-+    }
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "memory/allocation.hpp"
++#include "oops/access.hpp"
 +
-+    // We should never be able to see an adapter if the current frame is something from code cache
-+    if (sender_blob->is_adapter_blob()) {
-+      return false;
-+    }
++class BarrierSetAssembler: public CHeapObj<mtGC> {
++private:
++  void incr_allocated_bytes(MacroAssembler* masm,
++                            Register var_size_in_bytes, int con_size_in_bytes,
++                            Register t1 = noreg);
 +
-+    // Could be the call_stub
-+    if (StubRoutines::returns_to_call_stub(sender_pc)) {
-+      if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) {
-+        return false;
-+      }
++public:
++  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register src, Register dst, Register count, RegSet saved_regs) {}
++  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register start, Register end, Register tmp, RegSet saved_regs) {}
++  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                       Register dst, Address src, Register tmp1, Register tmp_thread);
++  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                        Address dst, Register val, Register tmp1, Register tmp2);
 +
-+      // construct the potential sender
-+      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
++  virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                             Register obj, Register tmp, Label& slowpath);
 +
-+      // Validate the JavaCallWrapper an entry frame must have
-+      address jcw = (address)sender.entry_frame_call_wrapper();
++  virtual void tlab_allocate(MacroAssembler* masm,
++    Register obj,                      // result: pointer to object after successful allocation
++    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,        // object size in bytes if   known at compile time
++    Register tmp1,                     // temp register
++    Register tmp2,                     // temp register
++    Label&   slow_case,                // continuation point if fast allocation fails
++    bool is_far = false
++  );
 +
-+      bool jcw_safe = (jcw < thread->stack_base()) && (jcw > (address)sender.fp());
++  void eden_allocate(MacroAssembler* masm,
++    Register obj,                      // result: pointer to object after successful allocation
++    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,        // object size in bytes if   known at compile time
++    Register tmp1,                     // temp register
++    Label&   slow_case,                // continuation point if fast allocation fails
++    bool is_far = false
++  );
++  virtual void barrier_stubs_init() {}
 +
-+      return jcw_safe;
-+    }
++  virtual ~BarrierSetAssembler() {}
++};
 +
-+    CompiledMethod* nm = sender_blob->as_compiled_method_or_null();
-+    if (nm != NULL) {
-+      if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) ||
-+          nm->method()->is_method_handle_intrinsic()) {
-+        return false;
-+      }
-+    }
++#endif // CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
+new file mode 100644
+index 0000000000..671cad68b2
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
+@@ -0,0 +1,122 @@
++/*
++ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+    // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size
-+    // because the return address counts against the callee's frame.
-+    if (sender_blob->frame_size() <= 0) {
-+      assert(!sender_blob->is_compiled(), "should count return address at least");
-+      return false;
-+    }
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/cardTable.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "gc/shared/cardTableBarrierSetAssembler.hpp"
++#include "gc/shared/gc_globals.hpp"
++#include "interpreter/interp_masm.hpp"
 +
-+    // We should never be able to see anything here except an nmethod. If something in the
-+    // code cache (current frame) is called by an entity within the code cache that entity
-+    // should not be anything but the call stub (already covered), the interpreter (already covered)
-+    // or an nmethod.
-+    if (!sender_blob->is_compiled()) {
-+        return false;
-+    }
++#define __ masm->
 +
-+    // Could put some more validation for the potential non-interpreted sender
-+    // frame we'd create by calling sender if I could think of any. Wait for next crash in forte...
 +
-+    // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb
++void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Register tmp) {
++  assert_different_registers(obj, tmp);
++  BarrierSet* bs = BarrierSet::barrier_set();
++  assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind");
 +
-+    // We've validated the potential sender that would be created
-+    return true;
-+  }
++  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
++  CardTable* ct = ctbs->card_table();
++  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
 +
-+  // Must be native-compiled frame. Since sender will try and use fp to find
-+  // linkages it must be safe
-+  if (!fp_safe) {
-+    return false;
-+  }
++  __ srli(obj, obj, CardTable::card_shift);
 +
-+  // Will the pc we fetch be non-zero (which we'll find at the oldest frame)
-+  if ((address)this->fp()[return_addr_offset] == NULL) { return false; }
++  assert(CardTable::dirty_card_val() == 0, "must be");
 +
-+  return true;
-+}
++  __ load_byte_map_base(tmp);
++  __ add(tmp, obj, tmp);
 +
-+void frame::patch_pc(Thread* thread, address pc) {
-+  assert(_cb == CodeCache::find_blob(pc), "unexpected pc");
-+  address* pc_addr = &(((address*) sp())[-1]);
-+  if (TracePcPatching) {
-+    tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
-+                  p2i(pc_addr), p2i(*pc_addr), p2i(pc));
-+  }
-+  // Either the return address is the original one or we are going to
-+  // patch in the same address that's already there.
-+  assert(_pc == *pc_addr || pc == *pc_addr, "must be");
-+  *pc_addr = pc;
-+  address original_pc = CompiledMethod::get_deopt_original_pc(this);
-+  if (original_pc != NULL) {
-+    assert(original_pc == _pc, "expected original PC to be stored before patching");
-+    _deopt_state = is_deoptimized;
-+    // leave _pc as is
++  if (UseCondCardMark) {
++    Label L_already_dirty;
++    __ membar(MacroAssembler::StoreLoad);
++    __ lbu(t1,  Address(tmp));
++    __ beqz(t1, L_already_dirty);
++    __ sb(zr, Address(tmp));
++    __ bind(L_already_dirty);
 +  } else {
-+    _deopt_state = not_deoptimized;
-+    _pc = pc;
++    if (ct->scanned_concurrently()) {
++      __ membar(MacroAssembler::StoreStore);
++    }
++    __ sb(zr, Address(tmp));
 +  }
 +}
 +
-+bool frame::is_interpreted_frame() const  {
-+  return Interpreter::contains(pc());
-+}
++void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                                    Register start, Register count, Register tmp, RegSet saved_regs) {
++  assert_different_registers(start, tmp);
++  assert_different_registers(count, tmp);
 +
-+int frame::frame_size(RegisterMap* map) const {
-+  frame sender = this->sender(map);
-+  return sender.sp() - sp();
-+}
++  BarrierSet* bs = BarrierSet::barrier_set();
++  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
++  CardTable* ct = ctbs->card_table();
 +
-+intptr_t* frame::entry_frame_argument_at(int offset) const {
-+  // convert offset to index to deal with tsi
-+  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
-+  // Entry frame's arguments are always in relation to unextended_sp()
-+  return &unextended_sp()[index];
-+}
++  Label L_loop, L_done;
++  const Register end = count;
 +
-+// sender_sp
-+intptr_t* frame::interpreter_frame_sender_sp() const {
-+  assert(is_interpreted_frame(), "interpreted frame expected");
-+  return (intptr_t*) at(interpreter_frame_sender_sp_offset);
-+}
-+
-+void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) {
-+  assert(is_interpreted_frame(), "interpreted frame expected");
-+  ptr_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp);
-+}
-+
-+
-+// monitor elements
-+
-+BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
-+  return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset);
-+}
-+
-+BasicObjectLock* frame::interpreter_frame_monitor_end() const {
-+  BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset);
-+  // make sure the pointer points inside the frame
-+  assert(sp() <= (intptr_t*) result, "monitor end should be above the stack pointer");
-+  assert((intptr_t*) result < fp(),  "monitor end should be strictly below the frame pointer");
-+  return result;
-+}
-+
-+void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) {
-+  *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value;
-+}
++  __ beqz(count, L_done); // zero count - nothing to do
++  // end = start + count << LogBytesPerHeapOop
++  __ shadd(end, count, start, count, LogBytesPerHeapOop);
++  __ sub(end, end, BytesPerHeapOop); // last element address to make inclusive
 +
-+// Used by template based interpreter deoptimization
-+void frame::interpreter_frame_set_last_sp(intptr_t* last_sp) {
-+  *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = last_sp;
-+}
++  __ srli(start, start, CardTable::card_shift);
++  __ srli(end, end, CardTable::card_shift);
++  __ sub(count, end, start); // number of bytes to copy
 +
-+frame frame::sender_for_entry_frame(RegisterMap* map) const {
-+  assert(map != NULL, "map must be set");
-+  // Java frame called from C; skip all C frames and return top C
-+  // frame of that chunk as the sender
-+  JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor();
-+  assert(!entry_frame_is_first(), "next Java fp must be non zero");
-+  assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack");
-+  // Since we are walking the stack now this nested anchor is obviously walkable
-+  // even if it wasn't when it was stacked.
-+  if (!jfa->walkable()) {
-+    // Capture _last_Java_pc (if needed) and mark anchor walkable.
-+    jfa->capture_last_Java_pc();
++  __ load_byte_map_base(tmp);
++  __ add(start, start, tmp);
++  if (ct->scanned_concurrently()) {
++    __ membar(MacroAssembler::StoreStore);
 +  }
-+  map->clear();
-+  assert(map->include_argument_oops(), "should be set by clear");
-+  vmassert(jfa->last_Java_pc() != NULL, "not walkable");
-+  frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
-+  return fr;
-+}
-+
-+OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const {
-+  ShouldNotCallThis();
-+  return nullptr;
-+}
-+
-+bool frame::optimized_entry_frame_is_first() const {
-+  ShouldNotCallThis();
-+  return false;
-+}
-+
-+frame frame::sender_for_optimized_entry_frame(RegisterMap* map) const {
-+  ShouldNotCallThis();
-+  return {};
-+}
-+
-+//------------------------------------------------------------------------------
-+// frame::verify_deopt_original_pc
-+//
-+// Verifies the calculated original PC of a deoptimization PC for the
-+// given unextended SP.
-+#ifdef ASSERT
-+void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) {
-+  frame fr;
-+
-+  // This is ugly but it's better than to change {get,set}_original_pc
-+  // to take an SP value as argument.  And it's only a debugging
-+  // method anyway.
-+  fr._unextended_sp = unextended_sp;
 +
-+  assert_cond(nm != NULL);
-+  address original_pc = nm->get_original_pc(&fr);
-+  assert(nm->insts_contains_inclusive(original_pc),
-+         "original PC must be in the main code section of the the compiled method (or must be immediately following it)");
++  __ bind(L_loop);
++  __ add(tmp, start, count);
++  __ sb(zr, Address(tmp));
++  __ sub(count, count, 1);
++  __ bgez(count, L_loop);
++  __ bind(L_done);
 +}
-+#endif
 +
-+//------------------------------------------------------------------------------
-+// frame::adjust_unextended_sp
-+void frame::adjust_unextended_sp() {
-+  // On riscv, sites calling method handle intrinsics and lambda forms are treated
-+  // as any other call site. Therefore, no special action is needed when we are
-+  // returning to any of these call sites.
++void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                                Address dst, Register val, Register tmp1, Register tmp2) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool is_array = (decorators & IS_ARRAY) != 0;
++  bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
++  bool precise = is_array || on_anonymous;
 +
-+  if (_cb != NULL) {
-+    CompiledMethod* sender_cm = _cb->as_compiled_method_or_null();
-+    if (sender_cm != NULL) {
-+      // If the sender PC is a deoptimization point, get the original PC.
-+      if (sender_cm->is_deopt_entry(_pc) ||
-+          sender_cm->is_deopt_mh_entry(_pc)) {
-+        DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp));
-+      }
++  bool needs_post_barrier = val != noreg && in_heap;
++  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg);
++  if (needs_post_barrier) {
++    // flatten object address if needed
++    if (!precise || dst.offset() == 0) {
++      store_check(masm, dst.base(), x13);
++    } else {
++      __ la(x13, dst);
++      store_check(masm, x13, t0);
 +    }
 +  }
 +}
+diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
+new file mode 100644
+index 0000000000..686fe8fa47
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
+@@ -0,0 +1,42 @@
++/*
++ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+//------------------------------------------------------------------------------
-+// frame::update_map_with_saved_link
-+void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) {
-+  // The interpreter and compiler(s) always save fp in a known
-+  // location on entry. We must record where that location is
-+  // so that if fp was live on callout from c2 we can find
-+  // the saved copy no matter what it called.
-+
-+  // Since the interpreter always saves fp if we record where it is then
-+  // we don't have to always save fp on entry and exit to c2 compiled
-+  // code, on entry will be enough.
-+  assert(map != NULL, "map must be set");
-+  map->set_location(::fp->as_VMReg(), (address) link_addr);
-+  // this is weird "H" ought to be at a higher address however the
-+  // oopMaps seems to have the "H" regs at the same address and the
-+  // vanilla register.
-+  map->set_location(::fp->as_VMReg()->next(), (address) link_addr);
-+}
-+
-+
-+//------------------------------------------------------------------------------
-+// frame::sender_for_interpreter_frame
-+frame frame::sender_for_interpreter_frame(RegisterMap* map) const {
-+  // SP is the raw SP from the sender after adapter or interpreter
-+  // extension.
-+  intptr_t* sender_sp = this->sender_sp();
-+
-+  // This is the sp before any possible extension (adapter/locals).
-+  intptr_t* unextended_sp = interpreter_frame_sender_sp();
-+
-+#ifdef COMPILER2
-+  assert(map != NULL, "map must be set");
-+  if (map->update_map()) {
-+    update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset));
-+  }
-+#endif // COMPILER2
-+
-+  return frame(sender_sp, unextended_sp, link(), sender_pc());
-+}
-+
-+
-+//------------------------------------------------------------------------------
-+// frame::sender_for_compiled_frame
-+frame frame::sender_for_compiled_frame(RegisterMap* map) const {
-+  // we cannot rely upon the last fp having been saved to the thread
-+  // in C2 code but it will have been pushed onto the stack. so we
-+  // have to find it relative to the unextended sp
-+
-+  assert(_cb->frame_size() >= 0, "must have non-zero frame size");
-+  intptr_t* l_sender_sp = unextended_sp() + _cb->frame_size();
-+  intptr_t* unextended_sp = l_sender_sp;
-+
-+  // the return_address is always the word on the stack
-+  address sender_pc = (address) *(l_sender_sp + frame::return_addr_offset);
-+
-+  intptr_t** saved_fp_addr = (intptr_t**) (l_sender_sp + frame::link_offset);
-+
-+  assert(map != NULL, "map must be set");
-+  if (map->update_map()) {
-+    // Tell GC to use argument oopmaps for some runtime stubs that need it.
-+    // For C1, the runtime stub might not have oop maps, so set this flag
-+    // outside of update_register_map.
-+    map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
-+    if (_cb->oop_maps() != NULL) {
-+      OopMapSet::update_register_map(this, map);
-+    }
-+
-+    // Since the prolog does the save and restore of FP there is no
-+    // oopmap for it so we must fill in its location as if there was
-+    // an oopmap entry since if our caller was compiled code there
-+    // could be live jvm state in it.
-+    update_map_with_saved_link(map, saved_fp_addr);
-+  }
++#ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
++#define CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
 +
-+  return frame(l_sender_sp, unextended_sp, *saved_fp_addr, sender_pc);
-+}
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
 +
-+//------------------------------------------------------------------------------
-+// frame::sender_raw
-+frame frame::sender_raw(RegisterMap* map) const {
-+  // Default is we done have to follow them. The sender_for_xxx will
-+  // update it accordingly
-+  assert(map != NULL, "map must be set");
-+  map->set_include_argument_oops(false);
++class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler {
++protected:
++  void store_check(MacroAssembler* masm, Register obj, Register tmp);
 +
-+  if (is_entry_frame()) {
-+    return sender_for_entry_frame(map);
-+  }
-+  if (is_interpreted_frame()) {
-+    return sender_for_interpreter_frame(map);
-+  }
-+  assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
++  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                Register start, Register count, Register tmp, RegSet saved_regs);
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2);
++};
 +
-+  // This test looks odd: why is it not is_compiled_frame() ?  That's
-+  // because stubs also have OOP maps.
-+  if (_cb != NULL) {
-+    return sender_for_compiled_frame(map);
-+  }
++#endif // #ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp
+new file mode 100644
+index 0000000000..4b7982eb21
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  // Must be native-compiled frame, i.e. the marshaling code for native
-+  // methods that exists in the core system.
-+  return frame(sender_sp(), link(), sender_pc());
-+}
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
 +
-+frame frame::sender(RegisterMap* map) const {
-+  frame result = sender_raw(map);
++#define __ masm->
 +
-+  if (map->process_frames()) {
-+    StackWatermarkSet::on_iteration(map->thread(), result);
++void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                                   Register src, Register dst, Register count, RegSet saved_regs) {
++  if (is_oop) {
++    gen_write_ref_array_pre_barrier(masm, decorators, dst, count, saved_regs);
 +  }
-+
-+  return result;
 +}
 +
-+bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
-+  assert(is_interpreted_frame(), "Not an interpreted frame");
-+  // These are reasonable sanity checks
-+  if (fp() == NULL || (intptr_t(fp()) & (wordSize-1)) != 0) {
-+    return false;
-+  }
-+  if (sp() == NULL || (intptr_t(sp()) & (wordSize-1)) != 0) {
-+    return false;
-+  }
-+  if (fp() + interpreter_frame_initial_sp_offset < sp()) {
-+    return false;
-+  }
-+  // These are hacks to keep us out of trouble.
-+  // The problem with these is that they mask other problems
-+  if (fp() <= sp()) {        // this attempts to deal with unsigned comparison above
-+    return false;
-+  }
-+
-+  // do some validation of frame elements
-+
-+  // first the method
-+  Method* m = *interpreter_frame_method_addr();
-+  // validate the method we'd find in this potential sender
-+  if (!Method::is_valid_method(m)) {
-+    return false;
-+  }
-+
-+  // stack frames shouldn't be much larger than max_stack elements
-+  // this test requires the use of unextended_sp which is the sp as seen by
-+  // the current frame, and not sp which is the "raw" pc which could point
-+  // further because of local variables of the callee method inserted after
-+  // method arguments
-+  if (fp() - unextended_sp() > 1024 + m->max_stack()*Interpreter::stackElementSize) {
-+    return false;
-+  }
-+
-+  // validate bci/bcx
-+  address bcp = interpreter_frame_bcp();
-+  if (m->validate_bci_from_bcp(bcp) < 0) {
-+    return false;
-+  }
-+
-+  // validate constantPoolCache*
-+  ConstantPoolCache* cp = *interpreter_frame_cache_addr();
-+  if (MetaspaceObj::is_valid(cp) == false) {
-+    return false;
-+  }
-+
-+  // validate locals
-+  address locals = (address) *interpreter_frame_locals_addr();
-+  if (locals > thread->stack_base() || locals < (address) fp()) {
-+    return false;
++void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                                   Register start, Register count, Register tmp,
++                                                   RegSet saved_regs) {
++  if (is_oop) {
++    gen_write_ref_array_post_barrier(masm, decorators, start, count, tmp, saved_regs);
 +  }
-+
-+  // We'd have to be pretty unlucky to be mislead at this point
-+  return true;
 +}
 +
-+BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) {
-+  assert(is_interpreted_frame(), "interpreted frame expected");
-+  Method* method = interpreter_frame_method();
-+  BasicType type = method->result_type();
-+
-+  intptr_t* tos_addr = NULL;
-+  if (method->is_native()) {
-+    tos_addr = (intptr_t*)sp();
-+    if (type == T_FLOAT || type == T_DOUBLE) {
-+      // This is because we do a push(ltos) after push(dtos) in generate_native_entry.
-+      tos_addr += 2 * Interpreter::stackElementWords;
-+    }
++void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                         Address dst, Register val, Register tmp1, Register tmp2) {
++  if (is_reference_type(type)) {
++    oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2);
 +  } else {
-+    tos_addr = (intptr_t*)interpreter_frame_tos_address();
-+  }
-+
-+  switch (type) {
-+    case T_OBJECT  :
-+    case T_ARRAY   : {
-+      oop obj;
-+      if (method->is_native()) {
-+        obj = cast_to_oop(at(interpreter_frame_oop_temp_offset));
-+      } else {
-+        oop* obj_p = (oop*)tos_addr;
-+        obj = (obj_p == NULL) ? (oop)NULL : *obj_p;
-+      }
-+      assert(Universe::is_in_heap_or_null(obj), "sanity check");
-+      *oop_result = obj;
-+      break;
-+    }
-+    case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break;
-+    case T_BYTE    : value_result->b = *(jbyte*)tos_addr; break;
-+    case T_CHAR    : value_result->c = *(jchar*)tos_addr; break;
-+    case T_SHORT   : value_result->s = *(jshort*)tos_addr; break;
-+    case T_INT     : value_result->i = *(jint*)tos_addr; break;
-+    case T_LONG    : value_result->j = *(jlong*)tos_addr; break;
-+    case T_FLOAT   : {
-+        value_result->f = *(jfloat*)tos_addr;
-+      break;
-+    }
-+    case T_DOUBLE  : value_result->d = *(jdouble*)tos_addr; break;
-+    case T_VOID    : /* Nothing to do */ break;
-+    default        : ShouldNotReachHere();
-+  }
-+
-+  return type;
-+}
-+
-+
-+intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
-+  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
-+  return &interpreter_frame_tos_address()[index];
-+}
-+
-+#ifndef PRODUCT
-+
-+#define DESCRIBE_FP_OFFSET(name) \
-+  values.describe(frame_no, fp() + frame::name##_offset, #name)
-+
-+void frame::describe_pd(FrameValues& values, int frame_no) {
-+  if (is_interpreted_frame()) {
-+    DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
-+    DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
-+    DESCRIBE_FP_OFFSET(interpreter_frame_method);
-+    DESCRIBE_FP_OFFSET(interpreter_frame_mdp);
-+    DESCRIBE_FP_OFFSET(interpreter_frame_mirror);
-+    DESCRIBE_FP_OFFSET(interpreter_frame_cache);
-+    DESCRIBE_FP_OFFSET(interpreter_frame_locals);
-+    DESCRIBE_FP_OFFSET(interpreter_frame_bcp);
-+    DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
-+  }
-+}
-+#endif
-+
-+intptr_t *frame::initial_deoptimization_info() {
-+  // Not used on riscv, but we must return something.
-+  return NULL;
-+}
-+
-+intptr_t* frame::real_fp() const {
-+  if (_cb != NULL) {
-+    // use the frame size if valid
-+    int size = _cb->frame_size();
-+    if (size > 0) {
-+      return unextended_sp() + size;
-+    }
++    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
 +  }
-+  // else rely on fp()
-+  assert(!is_compiled_frame(), "unknown compiled frame size");
-+  return fp();
-+}
-+
-+#undef DESCRIBE_FP_OFFSET
-+
-+#ifndef PRODUCT
-+// This is a generic constructor which is only used by pns() in debug.cpp.
-+frame::frame(void* ptr_sp, void* ptr_fp, void* pc) {
-+  init((intptr_t*)ptr_sp, (intptr_t*)ptr_fp, (address)pc);
-+}
-+
-+#endif
-+
-+void JavaFrameAnchor::make_walkable(JavaThread* thread) {
-+  // last frame set?
-+  if (last_Java_sp() == NULL) { return; }
-+  // already walkable?
-+  if (walkable()) { return; }
-+  vmassert(Thread::current() == (Thread*)thread, "not current thread");
-+  vmassert(last_Java_sp() != NULL, "not called from Java code?");
-+  vmassert(last_Java_pc() == NULL, "already walkable");
-+  capture_last_Java_pc();
-+  vmassert(walkable(), "something went wrong");
-+}
-+
-+void JavaFrameAnchor::capture_last_Java_pc() {
-+  vmassert(_last_Java_sp != NULL, "no last frame set");
-+  vmassert(_last_Java_pc == NULL, "already walkable");
-+  _last_Java_pc = (address)_last_Java_sp[-1];
 +}
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp
+diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp
 new file mode 100644
-index 00000000000..c06aaa9e391
+index 0000000000..00419c3163
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/frame_riscv.hpp
-@@ -0,0 +1,202 @@
++++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp
+@@ -0,0 +1,55 @@
 +/*
-+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -14950,193 +14395,45 @@ index 00000000000..c06aaa9e391
 + *
 + */
 +
-+#ifndef CPU_RISCV_FRAME_RISCV_HPP
-+#define CPU_RISCV_FRAME_RISCV_HPP
++#ifndef CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
++#define CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
 +
-+#include "runtime/synchronizer.hpp"
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
 +
-+// A frame represents a physical stack frame (an activation).  Frames can be
-+// C or Java frames, and the Java frames can be interpreted or compiled.
-+// In contrast, vframes represent source-level activations, so that one physical frame
-+// can correspond to multiple source level frames because of inlining.
-+// A frame is comprised of {pc, fp, sp}
-+// ------------------------------ Asm interpreter ----------------------------------------
-+// Layout of asm interpreter frame:
-+//    [expression stack      ] * <- sp
++// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other
++// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected
++// accesses, which are overridden in the concrete BarrierSetAssembler.
 +
-+//    [monitors[0]           ]   \
-+//     ...                        | monitor block size = k
-+//    [monitors[k-1]         ]   /
-+//    [frame initial esp     ] ( == &monitors[0], initially here)       initial_sp_offset
-+//    [byte code index/pointr]                   = bcx()                bcx_offset
-+
-+//    [pointer to locals     ]                   = locals()             locals_offset
-+//    [constant pool cache   ]                   = cache()              cache_offset
-+
-+//    [klass of method       ]                   = mirror()             mirror_offset
-+//    [padding               ]
-+
-+//    [methodData            ]                   = mdp()                mdx_offset
-+//    [Method                ]                   = method()             method_offset
-+
-+//    [last esp              ]                   = last_sp()            last_sp_offset
-+//    [old stack pointer     ]                     (sender_sp)          sender_sp_offset
-+
-+//    [old frame pointer     ]
-+//    [return pc             ]
-+
-+//    [last sp               ]   <- fp           = link()
-+//    [oop temp              ]                     (only for native calls)
-+
-+//    [padding               ]                     (to preserve machine SP alignment)
-+//    [locals and parameters ]
-+//                               <- sender sp
-+// ------------------------------ Asm interpreter ----------------------------------------
-+
-+// ------------------------------ C Frame ------------------------------------------------
-+// Stack: gcc with -fno-omit-frame-pointer
-+//                    .
-+//                    .
-+//       +->          .
-+//       |   +-----------------+   |
-+//       |   | return address  |   |
-+//       |   |   previous fp ------+
-+//       |   | saved registers |
-+//       |   | local variables |
-+//       |   |       ...       | <-+
-+//       |   +-----------------+   |
-+//       |   | return address  |   |
-+//       +------ previous fp   |   |
-+//           | saved registers |   |
-+//           | local variables |   |
-+//       +-> |       ...       |   |
-+//       |   +-----------------+   |
-+//       |   | return address  |   |
-+//       |   |   previous fp ------+
-+//       |   | saved registers |
-+//       |   | local variables |
-+//       |   |       ...       | <-+
-+//       |   +-----------------+   |
-+//       |   | return address  |   |
-+//       +------ previous fp   |   |
-+//           | saved registers |   |
-+//           | local variables |   |
-+//   $fp --> |       ...       |   |
-+//           +-----------------+   |
-+//           | return address  |   |
-+//           |   previous fp ------+
-+//           | saved registers |
-+//   $sp --> | local variables |
-+//           +-----------------+
-+// ------------------------------ C Frame ------------------------------------------------
-+
-+ public:
-+  enum {
-+    pc_return_offset                                 =  0,
-+    // All frames
-+    link_offset                                      = -2,
-+    return_addr_offset                               = -1,
-+    sender_sp_offset                                 =  0,
-+    // Interpreter frames
-+    interpreter_frame_oop_temp_offset                =  1, // for native calls only
-+
-+    interpreter_frame_sender_sp_offset               = -3,
-+    // outgoing sp before a call to an invoked method
-+    interpreter_frame_last_sp_offset                 = interpreter_frame_sender_sp_offset - 1,
-+    interpreter_frame_method_offset                  = interpreter_frame_last_sp_offset - 1,
-+    interpreter_frame_mdp_offset                     = interpreter_frame_method_offset - 1,
-+    interpreter_frame_padding_offset                 = interpreter_frame_mdp_offset - 1,
-+    interpreter_frame_mirror_offset                  = interpreter_frame_padding_offset - 1,
-+    interpreter_frame_cache_offset                   = interpreter_frame_mirror_offset - 1,
-+    interpreter_frame_locals_offset                  = interpreter_frame_cache_offset - 1,
-+    interpreter_frame_bcp_offset                     = interpreter_frame_locals_offset - 1,
-+    interpreter_frame_initial_sp_offset              = interpreter_frame_bcp_offset - 1,
-+
-+    interpreter_frame_monitor_block_top_offset       = interpreter_frame_initial_sp_offset,
-+    interpreter_frame_monitor_block_bottom_offset    = interpreter_frame_initial_sp_offset,
-+
-+    // Entry frames
-+    // n.b. these values are determined by the layout defined in
-+    // stubGenerator for the Java call stub
-+    entry_frame_after_call_words                     =  22,
-+    entry_frame_call_wrapper_offset                  = -10,
-+
-+    // we don't need a save area
-+    arg_reg_save_area_bytes                          =  0
-+  };
-+
-+  intptr_t ptr_at(int offset) const {
-+    return *ptr_at_addr(offset);
-+  }
-+
-+  void ptr_at_put(int offset, intptr_t value) {
-+    *ptr_at_addr(offset) = value;
-+  }
-+
-+ private:
-+  // an additional field beyond _sp and _pc:
-+  intptr_t*   _fp; // frame pointer
-+  // The interpreter and adapters will extend the frame of the caller.
-+  // Since oopMaps are based on the sp of the caller before extension
-+  // we need to know that value. However in order to compute the address
-+  // of the return address we need the real "raw" sp. Since sparc already
-+  // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's
-+  // original sp we use that convention.
-+
-+  intptr_t*     _unextended_sp;
-+  void adjust_unextended_sp();
-+
-+  intptr_t* ptr_at_addr(int offset) const {
-+    return (intptr_t*) addr_at(offset);
-+  }
-+
-+#ifdef ASSERT
-+  // Used in frame::sender_for_{interpreter,compiled}_frame
-+  static void verify_deopt_original_pc(   CompiledMethod* nm, intptr_t* unextended_sp);
-+#endif
-+
-+ public:
-+  // Constructors
-+
-+  frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc);
-+
-+  frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc);
-+
-+  frame(intptr_t* ptr_sp, intptr_t* ptr_fp);
-+
-+  void init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc);
-+
-+  // accessors for the instance variables
-+  // Note: not necessarily the real 'frame pointer' (see real_fp)
-+  intptr_t*   fp() const { return _fp; }
-+
-+  inline address* sender_pc_addr() const;
-+
-+  // expression stack tos if we are nested in a java call
-+  intptr_t* interpreter_frame_last_sp() const;
-+
-+  // helper to update a map with callee-saved RBP
-+  static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr);
-+
-+  // deoptimization support
-+  void interpreter_frame_set_last_sp(intptr_t* last_sp);
++class ModRefBarrierSetAssembler: public BarrierSetAssembler {
++protected:
++  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                               Register addr, Register count, RegSet saved_regs) {}
++  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                Register start, Register count, Register tmp, RegSet saved_regs) {}
 +
-+  static jint interpreter_frame_expression_stack_direction() { return -1; }
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2) = 0;
 +
-+  // returns the sending frame, without applying any barriers
-+  frame sender_raw(RegisterMap* map) const;
++public:
++  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register src, Register dst, Register count, RegSet saved_regs);
++  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register start, Register count, Register tmp, RegSet saved_regs);
++  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                        Address dst, Register val, Register tmp1, Register tmp2);
++};
 +
-+#endif // CPU_RISCV_FRAME_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
++#endif // CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
 new file mode 100644
-index 00000000000..5ac1bf57f57
+index 0000000000..d19f5b859c
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
-@@ -0,0 +1,248 @@
++++ b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
+@@ -0,0 +1,117 @@
 +/*
-+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -15159,236 +14456,106 @@ index 00000000000..5ac1bf57f57
 + *
 + */
 +
-+#ifndef CPU_RISCV_FRAME_RISCV_INLINE_HPP
-+#define CPU_RISCV_FRAME_RISCV_INLINE_HPP
-+
-+#include "code/codeCache.hpp"
-+#include "code/vmreg.inline.hpp"
-+
-+// Inline functions for RISCV frames:
-+
-+// Constructors:
++#include "precompiled.hpp"
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "gc/shared/gc_globals.hpp"
++#include "gc/shenandoah/shenandoahBarrierSet.hpp"
++#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
++#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
 +
-+inline frame::frame() {
-+  _pc = NULL;
-+  _sp = NULL;
-+  _unextended_sp = NULL;
-+  _fp = NULL;
-+  _cb = NULL;
-+  _deopt_state = unknown;
-+}
++#define __ masm->masm()->
 +
-+static int spin;
++void LIR_OpShenandoahCompareAndSwap::emit_code(LIR_Assembler* masm) {
++  Register addr = _addr->as_register_lo();
++  Register newval = _new_value->as_register();
++  Register cmpval = _cmp_value->as_register();
++  Register tmp1 = _tmp1->as_register();
++  Register tmp2 = _tmp2->as_register();
++  Register result = result_opr()->as_register();
 +
-+inline void frame::init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) {
-+  intptr_t a = intptr_t(ptr_sp);
-+  intptr_t b = intptr_t(ptr_fp);
-+  _sp = ptr_sp;
-+  _unextended_sp = ptr_sp;
-+  _fp = ptr_fp;
-+  _pc = pc;
-+  assert(pc != NULL, "no pc?");
-+  _cb = CodeCache::find_blob(pc);
-+  adjust_unextended_sp();
++  ShenandoahBarrierSet::assembler()->iu_barrier(masm->masm(), newval, t1);
 +
-+  address original_pc = CompiledMethod::get_deopt_original_pc(this);
-+  if (original_pc != NULL) {
-+    _pc = original_pc;
-+    _deopt_state = is_deoptimized;
-+  } else {
-+    _deopt_state = not_deoptimized;
++  if (UseCompressedOops) {
++    __ encode_heap_oop(tmp1, cmpval);
++    cmpval = tmp1;
++    __ encode_heap_oop(tmp2, newval);
++    newval = tmp2;
 +  }
-+}
 +
-+inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) {
-+  init(ptr_sp, ptr_fp, pc);
++  ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmpval, newval, /* acquire */ Assembler::aq,
++                                                 /* release */ Assembler::rl, /* is_cae */ false, result);
 +}
 +
-+inline frame::frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc) {
-+  intptr_t a = intptr_t(ptr_sp);
-+  intptr_t b = intptr_t(ptr_fp);
-+  _sp = ptr_sp;
-+  _unextended_sp = unextended_sp;
-+  _fp = ptr_fp;
-+  _pc = pc;
-+  assert(pc != NULL, "no pc?");
-+  _cb = CodeCache::find_blob(pc);
-+  adjust_unextended_sp();
-+
-+  address original_pc = CompiledMethod::get_deopt_original_pc(this);
-+  if (original_pc != NULL) {
-+    _pc = original_pc;
-+    assert(_cb->as_compiled_method()->insts_contains_inclusive(_pc),
-+           "original PC must be in the main code section of the the compiled method (or must be immediately following it)");
-+    _deopt_state = is_deoptimized;
-+  } else {
-+    _deopt_state = not_deoptimized;
-+  }
-+}
++#undef __
 +
-+inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp) {
-+  intptr_t a = intptr_t(ptr_sp);
-+  intptr_t b = intptr_t(ptr_fp);
-+  _sp = ptr_sp;
-+  _unextended_sp = ptr_sp;
-+  _fp = ptr_fp;
-+  _pc = (address)(ptr_sp[-1]);
++#ifdef ASSERT
++#define __ gen->lir(__FILE__, __LINE__)->
++#else
++#define __ gen->lir()->
++#endif
 +
-+  // Here's a sticky one. This constructor can be called via AsyncGetCallTrace
-+  // when last_Java_sp is non-null but the pc fetched is junk. If we are truly
-+  // unlucky the junk value could be to a zombied method and we'll die on the
-+  // find_blob call. This is also why we can have no asserts on the validity
-+  // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
-+  // -> pd_last_frame should use a specialized version of pd_last_frame which could
-+  // call a specilaized frame constructor instead of this one.
-+  // Then we could use the assert below. However this assert is of somewhat dubious
-+  // value.
++LIR_Opr ShenandoahBarrierSetC1::atomic_cmpxchg_at_resolved(LIRAccess& access, LIRItem& cmp_value, LIRItem& new_value) {
++  BasicType bt = access.type();
++  if (access.is_oop()) {
++    LIRGenerator *gen = access.gen();
++    if (ShenandoahSATBBarrier) {
++      pre_barrier(gen, access.access_emit_info(), access.decorators(), access.resolved_addr(),
++                  LIR_OprFact::illegalOpr /* pre_val */);
++    }
++    if (ShenandoahCASBarrier) {
++      cmp_value.load_item();
++      new_value.load_item();
 +
-+  _cb = CodeCache::find_blob(_pc);
-+  adjust_unextended_sp();
++      LIR_Opr tmp1 = gen->new_register(T_OBJECT);
++      LIR_Opr tmp2 = gen->new_register(T_OBJECT);
++      LIR_Opr addr = access.resolved_addr()->as_address_ptr()->base();
++      LIR_Opr result = gen->new_register(T_INT);
 +
-+  address original_pc = CompiledMethod::get_deopt_original_pc(this);
-+  if (original_pc != NULL) {
-+    _pc = original_pc;
-+    _deopt_state = is_deoptimized;
-+  } else {
-+    _deopt_state = not_deoptimized;
++      __ append(new LIR_OpShenandoahCompareAndSwap(addr, cmp_value.result(), new_value.result(), tmp1, tmp2, result));
++      return result;
++    }
 +  }
++  return BarrierSetC1::atomic_cmpxchg_at_resolved(access, cmp_value, new_value);
 +}
 +
-+// Accessors
-+
-+inline bool frame::equal(frame other) const {
-+  bool ret =  sp() == other.sp() &&
-+              unextended_sp() == other.unextended_sp() &&
-+              fp() == other.fp() &&
-+              pc() == other.pc();
-+  assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction");
-+  return ret;
-+}
-+
-+// Return unique id for this frame. The id must have a value where we can distinguish
-+// identity and younger/older relationship. NULL represents an invalid (incomparable)
-+// frame.
-+inline intptr_t* frame::id(void) const { return unextended_sp(); }
-+
-+// Return true if the frame is older (less recent activation) than the frame represented by id
-+inline bool frame::is_older(intptr_t* id) const   { assert(this->id() != NULL && id != NULL, "NULL frame id");
-+                                                    return this->id() > id ; }
-+
-+inline intptr_t* frame::link() const              { return (intptr_t*) *(intptr_t **)addr_at(link_offset); }
-+
-+inline intptr_t* frame::link_or_null() const {
-+  intptr_t** ptr = (intptr_t **)addr_at(link_offset);
-+  return os::is_readable_pointer(ptr) ? *ptr : NULL;
-+}
-+
-+inline intptr_t* frame::unextended_sp() const     { return _unextended_sp; }
-+
-+// Return address
-+inline address* frame::sender_pc_addr() const     { return (address*) addr_at(return_addr_offset); }
-+inline address  frame::sender_pc() const          { return *sender_pc_addr(); }
-+inline intptr_t* frame::sender_sp() const         { return addr_at(sender_sp_offset); }
-+
-+inline intptr_t** frame::interpreter_frame_locals_addr() const {
-+  return (intptr_t**)addr_at(interpreter_frame_locals_offset);
-+}
-+
-+inline intptr_t* frame::interpreter_frame_last_sp() const {
-+  return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset);
-+}
-+
-+inline intptr_t* frame::interpreter_frame_bcp_addr() const {
-+  return (intptr_t*)addr_at(interpreter_frame_bcp_offset);
-+}
-+
-+inline intptr_t* frame::interpreter_frame_mdp_addr() const {
-+  return (intptr_t*)addr_at(interpreter_frame_mdp_offset);
-+}
-+
-+
-+// Constant pool cache
-+
-+inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
-+  return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset);
-+}
-+
-+// Method
-+
-+inline Method** frame::interpreter_frame_method_addr() const {
-+  return (Method**)addr_at(interpreter_frame_method_offset);
-+}
-+
-+// Mirror
++LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRItem& value) {
++  LIRGenerator* gen = access.gen();
++  BasicType type = access.type();
 +
-+inline oop* frame::interpreter_frame_mirror_addr() const {
-+  return (oop*)addr_at(interpreter_frame_mirror_offset);
-+}
++  LIR_Opr result = gen->new_register(type);
++  value.load_item();
++  LIR_Opr value_opr = value.result();
 +
-+// top of expression stack
-+inline intptr_t* frame::interpreter_frame_tos_address() const {
-+  intptr_t* last_sp = interpreter_frame_last_sp();
-+  if (last_sp == NULL) {
-+    return sp();
-+  } else {
-+    // sp() may have been extended or shrunk by an adapter.  At least
-+    // check that we don't fall behind the legal region.
-+    // For top deoptimized frame last_sp == interpreter_frame_monitor_end.
-+    assert(last_sp <= (intptr_t*) interpreter_frame_monitor_end(), "bad tos");
-+    return last_sp;
++  if (access.is_oop()) {
++    value_opr = iu_barrier(access.gen(), value_opr, access.access_emit_info(), access.decorators());
 +  }
-+}
-+
-+inline oop* frame::interpreter_frame_temp_oop_addr() const {
-+  return (oop *)(fp() + interpreter_frame_oop_temp_offset);
-+}
-+
-+inline int frame::interpreter_frame_monitor_size() {
-+  return BasicObjectLock::size();
-+}
-+
-+
-+// expression stack
-+// (the max_stack arguments are used by the GC; see class FrameClosure)
-+
-+inline intptr_t* frame::interpreter_frame_expression_stack() const {
-+  intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end();
-+  return monitor_end-1;
-+}
-+
-+
-+// Entry frames
-+
-+inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
-+ return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset);
-+}
 +
++  assert(type == T_INT || is_reference_type(type) LP64_ONLY( || type == T_LONG ), "unexpected type");
++  LIR_Opr tmp = gen->new_register(T_INT);
++  __ xchg(access.resolved_addr(), value_opr, result, tmp);
 +
-+// Compiled frames
-+PRAGMA_DIAG_PUSH
-+PRAGMA_NONNULL_IGNORED
-+inline oop frame::saved_oop_result(RegisterMap* map) const {
-+  oop* result_adr = (oop *)map->location(x10->as_VMReg());
-+  guarantee(result_adr != NULL, "bad register save location");
-+  return (*result_adr);
-+}
++  if (access.is_oop()) {
++    result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0));
++    LIR_Opr tmp_opr = gen->new_register(type);
++    __ move(result, tmp_opr);
++    result = tmp_opr;
++    if (ShenandoahSATBBarrier) {
++      pre_barrier(access.gen(), access.access_emit_info(), access.decorators(), LIR_OprFact::illegalOpr,
++                  result /* pre_val */);
++    }
++  }
 +
-+inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
-+  oop* result_adr = (oop *)map->location(x10->as_VMReg());
-+  guarantee(result_adr != NULL, "bad register save location");
-+  *result_adr = obj;
++  return result;
 +}
-+PRAGMA_DIAG_POP
-+
-+#endif // CPU_RISCV_FRAME_RISCV_INLINE_HPP
-diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
+diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
 new file mode 100644
-index 00000000000..1c46b3947d3
+index 0000000000..d73ea36b24
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-@@ -0,0 +1,484 @@
++++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
+@@ -0,0 +1,715 @@
 +/*
-+ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2020, Red Hat, Inc. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -15413,92 +14580,87 @@ index 00000000000..1c46b3947d3
 + */
 +
 +#include "precompiled.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "gc/g1/g1BarrierSet.hpp"
-+#include "gc/g1/g1BarrierSetAssembler.hpp"
-+#include "gc/g1/g1BarrierSetRuntime.hpp"
-+#include "gc/g1/g1CardTable.hpp"
-+#include "gc/g1/g1ThreadLocalData.hpp"
-+#include "gc/g1/heapRegion.hpp"
-+#include "gc/shared/collectedHeap.hpp"
++#include "gc/shenandoah/shenandoahBarrierSet.hpp"
++#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
++#include "gc/shenandoah/shenandoahForwarding.hpp"
++#include "gc/shenandoah/shenandoahHeap.hpp"
++#include "gc/shenandoah/shenandoahHeapRegion.hpp"
++#include "gc/shenandoah/shenandoahRuntime.hpp"
++#include "gc/shenandoah/shenandoahThreadLocalData.hpp"
++#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
++#include "interpreter/interpreter.hpp"
 +#include "interpreter/interp_masm.hpp"
 +#include "runtime/sharedRuntime.hpp"
 +#include "runtime/thread.hpp"
 +#ifdef COMPILER1
 +#include "c1/c1_LIRAssembler.hpp"
 +#include "c1/c1_MacroAssembler.hpp"
-+#include "gc/g1/c1/g1BarrierSetC1.hpp"
++#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
 +#endif
 +
 +#define __ masm->
 +
-+void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
-+                                                            Register addr, Register count, RegSet saved_regs) {
-+  assert_cond(masm != NULL);
-+  bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
-+  if (!dest_uninitialized) {
-+    Label done;
-+    Address in_progress(xthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
++address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL;
 +
-+    // Is marking active?
-+    if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
-+      __ lwu(t0, in_progress);
-+    } else {
-+      assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
-+      __ lbu(t0, in_progress);
-+    }
-+    __ beqz(t0, done);
++void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                                       Register src, Register dst, Register count, RegSet saved_regs) {
++  if (is_oop) {
++    bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
++    if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahIUBarrier || ShenandoahLoadRefBarrier) {
 +
-+    __ push_reg(saved_regs, sp);
-+    if (count == c_rarg0) {
-+      if (addr == c_rarg1) {
-+        // exactly backwards!!
-+        __ mv(t0, c_rarg0);
-+        __ mv(c_rarg0, c_rarg1);
-+        __ mv(c_rarg1, t0);
++      Label done;
++
++      // Avoid calling runtime if count == 0
++      __ beqz(count, done);
++
++      // Is GC active?
++      Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
++      assert_different_registers(src, dst, count, t0);
++
++      __ lbu(t0, gc_state);
++      if (ShenandoahSATBBarrier && dest_uninitialized) {
++        __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED);
++        __ beqz(t0, done);
 +      } else {
-+        __ mv(c_rarg1, count);
-+        __ mv(c_rarg0, addr);
++        __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING);
++        __ beqz(t0, done);
 +      }
-+    } else {
-+      __ mv(c_rarg0, addr);
-+      __ mv(c_rarg1, count);
-+    }
-+    if (UseCompressedOops) {
-+      __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2);
-+    } else {
-+      __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2);
-+    }
-+    __ pop_reg(saved_regs, sp);
 +
-+    __ bind(done);
++      __ push_reg(saved_regs, sp);
++      if (UseCompressedOops) {
++        __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry),
++                        src, dst, count);
++      } else {
++        __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry), src, dst, count);
++      }
++      __ pop_reg(saved_regs, sp);
++      __ bind(done);
++    }
 +  }
 +}
 +
-+void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
-+                                                             Register start, Register count, Register tmp, RegSet saved_regs) {
-+  assert_cond(masm != NULL);
-+  __ push_reg(saved_regs, sp);
-+  assert_different_registers(start, count, tmp);
-+  assert_different_registers(c_rarg0, count);
-+  __ mv(c_rarg0, start);
-+  __ mv(c_rarg1, count);
-+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2);
-+  __ pop_reg(saved_regs, sp);
++void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm,
++                                                                 Register obj,
++                                                                 Register pre_val,
++                                                                 Register thread,
++                                                                 Register tmp,
++                                                                 bool tosca_live,
++                                                                 bool expand_call) {
++  if (ShenandoahSATBBarrier) {
++    satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call);
++  }
 +}
 +
-+void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
-+                                                 Register obj,
-+                                                 Register pre_val,
-+                                                 Register thread,
-+                                                 Register tmp,
-+                                                 bool tosca_live,
-+                                                 bool expand_call) {
++void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm,
++                                                           Register obj,
++                                                           Register pre_val,
++                                                           Register thread,
++                                                           Register tmp,
++                                                           bool tosca_live,
++                                                           bool expand_call) {
 +  // If expand_call is true then we expand the call_VM_leaf macro
 +  // directly to skip generating the check by
 +  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
-+
-+  assert_cond(masm != NULL);
 +  assert(thread == xthread, "must be");
 +
 +  Label done;
@@ -15507,15 +14669,15 @@ index 00000000000..1c46b3947d3
 +  assert_different_registers(obj, pre_val, tmp, t0);
 +  assert(pre_val != noreg &&  tmp != noreg, "expecting a register");
 +
-+  Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
-+  Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
-+  Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
++  Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset()));
++  Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
++  Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
 +
 +  // Is marking active?
-+  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { // 4-byte width
++  if (in_bytes(ShenandoahSATBMarkQueue::byte_width_of_active()) == 4) {
 +    __ lwu(tmp, in_progress);
 +  } else {
-+    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++    assert(in_bytes(ShenandoahSATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
 +    __ lbu(tmp, in_progress);
 +  }
 +  __ beqz(tmp, done);
@@ -15531,15 +14693,13 @@ index 00000000000..1c46b3947d3
 +  // Can we store original value in the thread's buffer?
 +  // Is index == 0?
 +  // (The index field is typed as size_t.)
++  __ ld(tmp, index);                        // tmp := *index_adr
++  __ beqz(tmp, runtime);                    // tmp == 0? If yes, goto runtime
 +
-+  __ ld(tmp, index);                       // tmp := *index_adr
-+  __ beqz(tmp, runtime);                   // tmp == 0?
-+                                           // If yes, goto runtime
-+
-+  __ sub(tmp, tmp, wordSize);              // tmp := tmp - wordSize
-+  __ sd(tmp, index);                       // *index_adr := tmp
++  __ sub(tmp, tmp, wordSize);               // tmp := tmp - wordSize
++  __ sd(tmp, index);                        // *index_adr := tmp
 +  __ ld(t0, buffer);
-+  __ add(tmp, tmp, t0);                    // tmp := tmp + *buffer_adr
++  __ add(tmp, tmp, t0);                     // tmp := tmp + *buffer_adr
 +
 +  // Record the previous value
 +  __ sd(pre_val, Address(tmp, 0));
@@ -15548,130 +14708,219 @@ index 00000000000..1c46b3947d3
 +  __ bind(runtime);
 +  // save the live input values
 +  RegSet saved = RegSet::of(pre_val);
-+  if (tosca_live) { saved += RegSet::of(x10); }
-+  if (obj != noreg) { saved += RegSet::of(obj); }
++  if (tosca_live) saved += RegSet::of(x10);
++  if (obj != noreg) saved += RegSet::of(obj);
 +
 +  __ push_reg(saved, sp);
 +
++  // Calling the runtime using the regular call_VM_leaf mechanism generates
++  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
++  // that checks that the *(rfp+frame::interpreter_frame_last_sp) == NULL.
++  //
++  // If we care generating the pre-barrier without a frame (e.g. in the
++  // intrinsified Reference.get() routine) then ebp might be pointing to
++  // the caller frame and so this check will most likely fail at runtime.
++  //
++  // Expanding the call directly bypasses the generation of the check.
++  // So when we do not have have a full interpreter frame on the stack
++  // expand_call should be passed true.
 +  if (expand_call) {
 +    assert(pre_val != c_rarg1, "smashed arg");
-+    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
 +  } else {
-+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
 +  }
 +
 +  __ pop_reg(saved, sp);
 +
 +  __ bind(done);
++}
++
++void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) {
++  assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
 +
++  Label is_null;
++  __ beqz(dst, is_null);
++  resolve_forward_pointer_not_null(masm, dst, tmp);
++  __ bind(is_null);
 +}
 +
-+void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
-+                                                  Register store_addr,
-+                                                  Register new_val,
-+                                                  Register thread,
-+                                                  Register tmp,
-+                                                  Register tmp2) {
-+  assert_cond(masm != NULL);
-+  assert(thread == xthread, "must be");
-+  assert_different_registers(store_addr, new_val, thread, tmp, tmp2,
-+                             t0);
-+  assert(store_addr != noreg && new_val != noreg && tmp != noreg &&
-+         tmp2 != noreg, "expecting a register");
++// IMPORTANT: This must preserve all registers, even t0 and t1, except those explicitely
++// passed in.
++void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) {
++  assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
++  // The below loads the mark word, checks if the lowest two bits are
++  // set, and if so, clear the lowest two bits and copy the result
++  // to dst. Otherwise it leaves dst alone.
++  // Implementing this is surprisingly awkward. I do it here by:
++  // - Inverting the mark word
++  // - Test lowest two bits == 0
++  // - If so, set the lowest two bits
++  // - Invert the result back, and copy to dst
++  RegSet saved_regs = RegSet::of(t2);
++  bool borrow_reg = (tmp == noreg);
++  if (borrow_reg) {
++    // No free registers available. Make one useful.
++    tmp = t0;
++    if (tmp == dst) {
++      tmp = t1;
++    }
++    saved_regs += RegSet::of(tmp);
++  }
 +
-+  Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
-+  Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
++  assert_different_registers(tmp, dst, t2);
++  __ push_reg(saved_regs, sp);
 +
-+  BarrierSet* bs = BarrierSet::barrier_set();
-+  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
-+  CardTable* ct = ctbs->card_table();
++  Label done;
++  __ ld(tmp, Address(dst, oopDesc::mark_offset_in_bytes()));
++  __ xori(tmp, tmp, -1); // eon with 0 is equivalent to XOR with -1
++  __ andi(t2, tmp, markOopDesc::lock_mask_in_place);
++  __ bnez(t2, done);
++  __ ori(tmp, tmp, markOopDesc::marked_value);
++  __ xori(dst, tmp, -1); // eon with 0 is equivalent to XOR with -1
++  __ bind(done);
++
++  __ pop_reg(saved_regs, sp);
++}
++
++void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm,
++                                                                    Register dst,
++                                                                    Address load_addr) {
++  assert(ShenandoahLoadRefBarrier, "Should be enabled");
++  assert(dst != t1 && load_addr.base() != t1, "need t1");
++  assert_different_registers(load_addr.base(), t0, t1);
 +
 +  Label done;
-+  Label runtime;
++  __ enter();
++  Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
++  __ lbu(t1, gc_state);
 +
-+  // Does store cross heap regions?
++  // Check for heap stability
++  __ andi(t1, t1, ShenandoahHeap::HAS_FORWARDED);
++  __ beqz(t1, done);
 +
-+  __ xorr(tmp, store_addr, new_val);
-+  __ srli(tmp, tmp, HeapRegion::LogOfHRGrainBytes);
-+  __ beqz(tmp, done);
++  // use x11 for load address
++  Register result_dst = dst;
++  if (dst == x11) {
++    __ mv(t1, dst);
++    dst = t1;
++  }
 +
-+  // crosses regions, storing NULL?
++  // Save x10 and x11, unless it is an output register
++  RegSet saved_regs = RegSet::of(x10, x11) - result_dst;
++  __ push_reg(saved_regs, sp);
++  __ la(x11, load_addr);
++  __ mv(x10, dst);
 +
-+  __ beqz(new_val, done);
++  __ far_call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb())));
 +
-+  // storing region crossing non-NULL, is card already dirty?
++  __ mv(result_dst, x10);
++  __ pop_reg(saved_regs, sp);
 +
-+  ExternalAddress cardtable((address) ct->byte_map_base());
-+  const Register card_addr = tmp;
++  __ bind(done);
++  __ leave();
++}
 +
-+  __ srli(card_addr, store_addr, CardTable::card_shift());
++void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register dst, Register tmp) {
++  if (ShenandoahIUBarrier) {
++    __ push_call_clobbered_registers();
 +
-+  // get the address of the card
-+  __ load_byte_map_base(tmp2);
-+  __ add(card_addr, card_addr, tmp2);
-+  __ lbu(tmp2, Address(card_addr));
-+  __ mv(t0, (int)G1CardTable::g1_young_card_val());
-+  __ beq(tmp2, t0, done);
++    satb_write_barrier_pre(masm, noreg, dst, xthread, tmp, true, false);
 +
-+  assert((int)CardTable::dirty_card_val() == 0, "must be 0");
++    __ pop_call_clobbered_registers();
++  }
++}
 +
-+  __ membar(MacroAssembler::StoreLoad);
++void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr) {
++  if (ShenandoahLoadRefBarrier) {
++    Label is_null;
++    __ beqz(dst, is_null);
++    load_reference_barrier_not_null(masm, dst, load_addr);
++    __ bind(is_null);
++  }
++}
 +
-+  __ lbu(tmp2, Address(card_addr));
-+  __ beqz(tmp2, done);
++//
++// Arguments:
++//
++// Inputs:
++//   src:        oop location to load from, might be clobbered
++//
++// Output:
++//   dst:        oop loaded from src location
++//
++// Kill:
++//   x30 (tmp reg)
++//
++// Alias:
++//   dst: x30 (might use x30 as temporary output register to avoid clobbering src)
++//
++void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm,
++                                            DecoratorSet decorators,
++                                            BasicType type,
++                                            Register dst,
++                                            Address src,
++                                            Register tmp1,
++                                            Register tmp_thread) {
++  // 1: non-reference load, no additional barrier is needed
++  if (!is_reference_type(type)) {
++    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
++    return;
++  }
 +
-+  // storing a region crossing, non-NULL oop, card is clean.
-+  // dirty card and log.
++  // 2: load a reference from src location and apply LRB if needed
++  if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) {
++    Register result_dst = dst;
 +
-+  __ sb(zr, Address(card_addr));
++    // Preserve src location for LRB
++    RegSet saved_regs;
++    if (dst == src.base()) {
++      dst = (src.base() == x28) ? x29 : x28;
++      saved_regs = RegSet::of(dst);
++      __ push_reg(saved_regs, sp);
++    }
++    assert_different_registers(dst, src.base());
 +
-+  __ ld(t0, queue_index);
-+  __ beqz(t0, runtime);
-+  __ sub(t0, t0, wordSize);
-+  __ sd(t0, queue_index);
++    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
 +
-+  __ ld(tmp2, buffer);
-+  __ add(t0, tmp2, t0);
-+  __ sd(card_addr, Address(t0, 0));
-+  __ j(done);
++    load_reference_barrier(masm, dst, src);
 +
-+  __ bind(runtime);
-+  // save the live input values
-+  RegSet saved = RegSet::of(store_addr);
-+  __ push_reg(saved, sp);
-+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
-+  __ pop_reg(saved, sp);
++    if (dst != result_dst) {
++      __ mv(result_dst, dst);
++      dst = result_dst;
++    }
 +
-+  __ bind(done);
-+}
++    if (saved_regs.bits() != 0) {
++      __ pop_reg(saved_regs, sp);
++    }
++  } else {
++    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
++  }
 +
-+void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                                    Register dst, Address src, Register tmp1, Register tmp_thread) {
-+  assert_cond(masm != NULL);
-+  bool on_oop = is_reference_type(type);
-+  bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
-+  bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
-+  bool on_reference = on_weak || on_phantom;
-+  ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
-+  if (on_oop && on_reference) {
-+    // RA is live.  It must be saved around calls.
-+    __ enter(); // barrier may call runtime
-+    // Generate the G1 pre-barrier code to log the value of
-+    // the referent field in an SATB buffer.
-+    g1_write_barrier_pre(masm /* masm */,
-+                         noreg /* obj */,
-+                         dst /* pre_val */,
-+                         xthread /* thread */,
-+                         tmp1 /* tmp */,
-+                         true /* tosca_live */,
-+                         true /* expand_call */);
++  // 3: apply keep-alive barrier if needed
++  if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
++    __ enter();
++    __ push_call_clobbered_registers();
++    satb_write_barrier_pre(masm /* masm */,
++                           noreg /* obj */,
++                           dst /* pre_val */,
++                           xthread /* thread */,
++                           tmp1 /* tmp */,
++                           true /* tosca_live */,
++                           true /* expand_call */);
++    __ pop_call_clobbered_registers();
 +    __ leave();
 +  }
 +}
 +
-+void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                                         Address dst, Register val, Register tmp1, Register tmp2) {
-+  assert_cond(masm != NULL);
++void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                             Address dst, Register val, Register tmp1, Register tmp2) {
++  bool on_oop = is_reference_type(type);
++  if (!on_oop) {
++    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
++    return;
++  }
++
 +  // flatten object address if needed
 +  if (dst.offset() == 0) {
 +    if (dst.base() != x13) {
@@ -15681,17 +14930,18 @@ index 00000000000..1c46b3947d3
 +    __ la(x13, dst);
 +  }
 +
-+  g1_write_barrier_pre(masm,
-+                       x13 /* obj */,
-+                       tmp2 /* pre_val */,
-+                       xthread /* thread */,
-+                       tmp1  /* tmp */,
-+                       val != noreg /* tosca_live */,
-+                       false /* expand_call */);
++  shenandoah_write_barrier_pre(masm,
++                               x13 /* obj */,
++                               tmp2 /* pre_val */,
++                               xthread /* thread */,
++                               tmp1  /* tmp */,
++                               val != noreg /* tosca_live */,
++                               false /* expand_call */);
 +
 +  if (val == noreg) {
 +    BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), noreg, noreg, noreg);
 +  } else {
++    iu_barrier(masm, val, tmp1);
 +    // G1 barrier needs uncompressed oop for region cross check.
 +    Register new_val = val;
 +    if (UseCompressedOops) {
@@ -15699,23 +14949,121 @@ index 00000000000..1c46b3947d3
 +      __ mv(new_val, val);
 +    }
 +    BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), val, noreg, noreg);
-+    g1_write_barrier_post(masm,
-+                          x13 /* store_adr */,
-+                          new_val /* new_val */,
-+                          xthread /* thread */,
-+                          tmp1 /* tmp */,
-+                          tmp2 /* tmp2 */);
 +  }
 +}
 +
-+#ifdef COMPILER1
++void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                                                  Register obj, Register tmp, Label& slowpath) {
++  Label done;
++  // Resolve jobject
++  BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath);
++
++  // Check for null.
++  __ beqz(obj, done);
++
++  assert(obj != t1, "need t1");
++  Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset());
++  __ lbu(t1, gc_state);
++
++  // Check for heap in evacuation phase
++  __ andi(t0, t1, ShenandoahHeap::EVACUATION);
++  __ bnez(t0, slowpath);
++
++  __ bind(done);
++}
++
++// Special Shenandoah CAS implementation that handles false negatives due
++// to concurrent evacuation.  The service is more complex than a
++// traditional CAS operation because the CAS operation is intended to
++// succeed if the reference at addr exactly matches expected or if the
++// reference at addr holds a pointer to a from-space object that has
++// been relocated to the location named by expected.  There are two
++// races that must be addressed:
++//  a) A parallel thread may mutate the contents of addr so that it points
++//     to a different object.  In this case, the CAS operation should fail.
++//  b) A parallel thread may heal the contents of addr, replacing a
++//     from-space pointer held in addr with the to-space pointer
++//     representing the new location of the object.
++// Upon entry to cmpxchg_oop, it is assured that new_val equals NULL
++// or it refers to an object that is not being evacuated out of
++// from-space, or it refers to the to-space version of an object that
++// is being evacuated out of from-space.
++//
++// By default the value held in the result register following execution
++// of the generated code sequence is 0 to indicate failure of CAS,
++// non-zero to indicate success. If is_cae, the result is the value most
++// recently fetched from addr rather than a boolean success indicator.
++//
++// Clobbers t0, t1
++void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
++                                                Register addr,
++                                                Register expected,
++                                                Register new_val,
++                                                Assembler::Aqrl acquire,
++                                                Assembler::Aqrl release,
++                                                bool is_cae,
++                                                Register result) {
++  bool is_narrow = UseCompressedOops;
++  Assembler::operand_size size = is_narrow ? Assembler::uint32 : Assembler::int64;
++
++  assert_different_registers(addr, expected, t0, t1);
++  assert_different_registers(addr, new_val, t0, t1);
++
++  Label retry, success, fail, done;
++
++  __ bind(retry);
++
++  // Step1: Try to CAS.
++  __ cmpxchg(addr, expected, new_val, size, acquire, release, /* result */ t1);
++
++  // If success, then we are done.
++  __ beq(expected, t1, success);
++
++  // Step2: CAS failed, check the forwared pointer.
++  __ mv(t0, t1);
++
++  if (is_narrow) {
++    __ decode_heap_oop(t0, t0);
++  }
++  resolve_forward_pointer(masm, t0);
++
++  __ encode_heap_oop(t0, t0);
++
++  // Report failure when the forwarded oop was not expected.
++  __ bne(t0, expected, fail);
++
++  // Step 3: CAS again using the forwarded oop.
++  __ cmpxchg(addr, t1, new_val, size, acquire, release, /* result */ t0);
++
++  // Retry when failed.
++  __ bne(t0, t1, retry);
++
++  __ bind(success);
++  if (is_cae) {
++    __ mv(result, expected);
++  } else {
++    __ mv(result, 1);
++  }
++  __ j(done);
++
++  __ bind(fail);
++  if (is_cae) {
++    __ mv(result, t0);
++  } else {
++    __ mv(result, zr);
++  }
++
++  __ bind(done);
++}
 +
 +#undef __
-+#define __ ce->masm()->
 +
-+void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) {
-+  G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
++#ifdef COMPILER1
 +
++#define __ ce->masm()->
++
++void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {
++  ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
 +  // At this point we know that marking is in progress.
 +  // If do_load() is true then we have to emit the
 +  // load of the previous value; otherwise it has already
@@ -15727,7 +15075,7 @@ index 00000000000..1c46b3947d3
 +  Register pre_val_reg = stub->pre_val()->as_register();
 +
 +  if (stub->do_load()) {
-+    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */);
++    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/);
 +  }
 +  __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true);
 +  ce->store_parameter(stub->pre_val()->as_register(), 0);
@@ -15735,46 +15083,79 @@ index 00000000000..1c46b3947d3
 +  __ j(*stub->continuation());
 +}
 +
-+void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
-+  G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
++void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce,
++                                                                    ShenandoahLoadReferenceBarrierStub* stub) {
++  ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
 +  __ bind(*stub->entry());
-+  assert(stub->addr()->is_register(), "Precondition");
-+  assert(stub->new_val()->is_register(), "Precondition");
-+  Register new_val_reg = stub->new_val()->as_register();
-+  __ beqz(new_val_reg, *stub->continuation(), /* is_far */ true);
-+  ce->store_parameter(stub->addr()->as_pointer_register(), 0);
-+  __ far_call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin()));
-+  __ j(*stub->continuation());
-+}
 +
-+#undef __
++  Register obj = stub->obj()->as_register();
++  Register res = stub->result()->as_register();
++  Register addr = stub->addr()->as_pointer_register();
++  Register tmp1 = stub->tmp1()->as_register();
++  Register tmp2 = stub->tmp2()->as_register();
 +
-+#define __ sasm->
++  assert(res == x10, "result must arrive in x10");
++  assert_different_registers(tmp1, tmp2, t0);
 +
-+void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
-+  __ prologue("g1_pre_barrier", false);
++  if (res != obj) {
++    __ mv(res, obj);
++  }
 +
-+  BarrierSet* bs = BarrierSet::barrier_set();
++  // Check for null.
++  __ beqz(res, *stub->continuation(), /* is_far */ true);
 +
-+  // arg0 : previous value of memory
-+  const Register pre_val = x10;
-+  const Register thread = xthread;
-+  const Register tmp = t0;
++  // Check for object in cset.
++  __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr());
++  __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint());
++  __ add(t0, tmp2, tmp1);
++  __ lb(tmp2, Address(t0));
++  __ beqz(tmp2, *stub->continuation(), /* is_far */ true);
 +
-+  Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
-+  Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
-+  Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
++  // Check if object is already forwarded.
++  Label slow_path;
++  __ ld(tmp1, Address(res, oopDesc::mark_offset_in_bytes()));
++  __ xori(tmp1, tmp1, -1);
++  __ andi(t0, tmp1, markOopDesc::lock_mask_in_place);
++  __ bnez(t0, slow_path);
++
++  // Decode forwarded object.
++  __ ori(tmp1, tmp1, markOopDesc::marked_value);
++  __ xori(res, tmp1, -1);
++  __ j(*stub->continuation());
++
++  __ bind(slow_path);
++  ce->store_parameter(res, 0);
++  ce->store_parameter(addr, 1);
++  __ far_call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin()));
++
++  __ j(*stub->continuation());
++}
++
++#undef __
++
++#define __ sasm->
++
++void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
++  __ prologue("shenandoah_pre_barrier", false);
++
++  // arg0 : previous value of memory
++
++  BarrierSet* bs = BarrierSet::barrier_set();
++
++  const Register pre_val = x10;
++  const Register thread = xthread;
++  const Register tmp = t0;
++
++  Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
++  Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
 +
 +  Label done;
 +  Label runtime;
 +
 +  // Is marking still active?
-+  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {  // 4-byte width
-+    __ lwu(tmp, in_progress);
-+  } else {
-+    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
-+    __ lbu(tmp, in_progress);
-+  }
++  Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
++  __ lb(tmp, gc_state);
++  __ andi(tmp, tmp, ShenandoahHeap::MARKING);
 +  __ beqz(tmp, done);
 +
 +  // Can we store original value in the thread's buffer?
@@ -15792,93 +15173,110 @@ index 00000000000..1c46b3947d3
 +  __ bind(runtime);
 +  __ push_call_clobbered_registers();
 +  __ load_parameter(0, pre_val);
-+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
 +  __ pop_call_clobbered_registers();
 +  __ bind(done);
 +
 +  __ epilogue();
 +}
 +
-+void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
-+  __ prologue("g1_post_barrier", false);
-+
-+  // arg0 : store_address
-+  Address store_addr(fp, 2 * BytesPerWord); // 2 BytesPerWord from fp
-+
-+  BarrierSet* bs = BarrierSet::barrier_set();
-+  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
-+  CardTable* ct = ctbs->card_table();
-+
-+  Label done;
-+  Label runtime;
++void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm) {
++  __ prologue("shenandoah_load_reference_barrier", false);
++  // arg0 : object to be resolved
 +
-+  // At this point we know new_value is non-NULL and the new_value crosses regions.
-+  // Must check to see if card is already dirty
-+  const Register thread = xthread;
++  __ push_call_clobbered_registers();
++  __ load_parameter(0, x10);
++  __ load_parameter(1, x11);
 +
-+  Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
-+  Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
++  if (UseCompressedOops) {
++    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
++  } else {
++    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
++  }
++  __ jalr(ra);
++  __ mv(t0, x10);
++  __ pop_call_clobbered_registers();
++  __ mv(x10, t0);
 +
-+  const Register card_offset = t1;
-+  // RA is free here, so we can use it to hold the byte_map_base.
-+  const Register byte_map_base = ra;
++  __ epilogue();
++}
 +
-+  assert_different_registers(card_offset, byte_map_base, t0);
++#undef __
 +
-+  __ load_parameter(0, card_offset);
-+  __ srli(card_offset, card_offset, CardTable::card_shift());
-+  __ load_byte_map_base(byte_map_base);
++#endif // COMPILER1
 +
-+  // Convert card offset into an address in card_addr
-+  Register card_addr = card_offset;
-+  __ add(card_addr, byte_map_base, card_addr);
++address ShenandoahBarrierSetAssembler::shenandoah_lrb() {
++  assert(_shenandoah_lrb != NULL, "need load reference barrier stub");
++  return _shenandoah_lrb;
++}
 +
-+  __ lbu(t0, Address(card_addr, 0));
-+  __ sub(t0, t0, (int)G1CardTable::g1_young_card_val());
-+  __ beqz(t0, done);
++#define __ cgen->assembler()->
 +
-+  assert((int)CardTable::dirty_card_val() == 0, "must be 0");
++// Shenandoah load reference barrier.
++//
++// Input:
++//   x10: OOP to evacuate.  Not null.
++//   x11: load address
++//
++// Output:
++//   x10: Pointer to evacuated OOP.
++//
++// Trash t0 t1  Preserve everything else.
++address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) {
++  __ align(6);
++  StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb");
++  address start = __ pc();
 +
-+  __ membar(MacroAssembler::StoreLoad);
-+  __ lbu(t0, Address(card_addr, 0));
-+  __ beqz(t0, done);
++  Label slow_path;
++  __ mv(t1, ShenandoahHeap::in_cset_fast_test_addr());
++  __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint());
++  __ add(t1, t1, t0);
++  __ lbu(t1, Address(t1, 0));
++  __ andi(t0, t1, 1);
++  __ bnez(t0, slow_path);
++  __ ret();
 +
-+  // storing region crossing non-NULL, card is clean.
-+  // dirty card and log.
-+  __ sb(zr, Address(card_addr, 0));
++  __ bind(slow_path);
++  __ enter(); // required for proper stackwalking of RuntimeStub frame
 +
-+  __ ld(t0, queue_index);
-+  __ beqz(t0, runtime);
-+  __ sub(t0, t0, wordSize);
-+  __ sd(t0, queue_index);
++  __ push_call_clobbered_registers();
 +
-+  // Reuse RA to hold buffer_addr
-+  const Register buffer_addr = ra;
++  if (UseCompressedOops) {
++    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
++  } else {
++    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
++  }
++  __ jalr(ra);
++  __ mv(t0, x10);
++  __ pop_call_clobbered_registers();
++  __ mv(x10, t0);
 +
-+  __ ld(buffer_addr, buffer);
-+  __ add(t0, buffer_addr, t0);
-+  __ sd(card_addr, Address(t0, 0));
-+  __ j(done);
++  __ leave(); // required for proper stackwalking of RuntimeStub frame
++  __ ret();
 +
-+  __ bind(runtime);
-+  __ push_call_clobbered_registers();
-+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
-+  __ pop_call_clobbered_registers();
-+  __ bind(done);
-+  __ epilogue();
++  return start;
 +}
 +
 +#undef __
 +
-+#endif // COMPILER1
-diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
++void ShenandoahBarrierSetAssembler::barrier_stubs_init() {
++  if (ShenandoahLoadRefBarrier) {
++    int stub_code_size = 2048;
++    ResourceMark rm;
++    BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size);
++    CodeBuffer buf(bb);
++    StubCodeGenerator cgen(&buf);
++    _shenandoah_lrb = generate_shenandoah_lrb(&cgen);
++  }
++}
+diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
 new file mode 100644
-index 00000000000..37bc183f39c
+index 0000000000..5d75035e9d
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
-@@ -0,0 +1,78 @@
++++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
+@@ -0,0 +1,97 @@
 +/*
-+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -15902,68 +15300,291 @@ index 00000000000..37bc183f39c
 + *
 + */
 +
-+#ifndef CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
-+#define CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
++#ifndef CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
++#define CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
 +
 +#include "asm/macroAssembler.hpp"
-+#include "gc/shared/modRefBarrierSetAssembler.hpp"
-+#include "utilities/macros.hpp"
-+
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shenandoah/shenandoahBarrierSet.hpp"
 +#ifdef COMPILER1
 +class LIR_Assembler;
-+#endif
++class ShenandoahPreBarrierStub;
++class ShenandoahLoadReferenceBarrierStub;
 +class StubAssembler;
-+class G1PreBarrierStub;
-+class G1PostBarrierStub;
++#endif
++class StubCodeGenerator;
 +
-+class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
-+protected:
-+  void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
-+                                       Register addr, Register count, RegSet saved_regs);
-+  void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
-+                                        Register start, Register count, Register tmp, RegSet saved_regs);
++class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
++private:
 +
-+  void g1_write_barrier_pre(MacroAssembler* masm,
-+                            Register obj,
-+                            Register pre_val,
-+                            Register thread,
-+                            Register tmp,
-+                            bool tosca_live,
-+                            bool expand_call);
++  static address _shenandoah_lrb;
 +
-+  void g1_write_barrier_post(MacroAssembler* masm,
-+                             Register store_addr,
-+                             Register new_val,
-+                             Register thread,
-+                             Register tmp,
-+                             Register tmp2);
++  void satb_write_barrier_pre(MacroAssembler* masm,
++                              Register obj,
++                              Register pre_val,
++                              Register thread,
++                              Register tmp,
++                              bool tosca_live,
++                              bool expand_call);
++  void shenandoah_write_barrier_pre(MacroAssembler* masm,
++                                    Register obj,
++                                    Register pre_val,
++                                    Register thread,
++                                    Register tmp,
++                                    bool tosca_live,
++                                    bool expand_call);
 +
-+  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                            Address dst, Register val, Register tmp1, Register tmp2);
++  void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg);
++  void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg);
++  void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr);
++  void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address load_addr);
++
++  address generate_shenandoah_lrb(StubCodeGenerator* cgen);
 +
 +public:
-+#ifdef COMPILER1
-+  void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
-+  void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
 +
++  static address shenandoah_lrb();
++
++  void iu_barrier(MacroAssembler* masm, Register dst, Register tmp);
++
++#ifdef COMPILER1
++  void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub);
++  void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub);
 +  void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
-+  void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
++  void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm);
 +#endif
 +
-+  void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+               Register dst, Address src, Register tmp1, Register tmp_thread);
++  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register src, Register dst, Register count, RegSet saved_regs);
++
++  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                       Register dst, Address src, Register tmp1, Register tmp_thread);
++  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                        Address dst, Register val, Register tmp1, Register tmp2);
++
++  virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                             Register obj, Register tmp, Label& slowpath);
++
++  virtual void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val,
++                   Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result);
++
++  virtual void barrier_stubs_init();
 +};
 +
-+#endif // CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp
++#endif // CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
 new file mode 100644
-index 00000000000..8735fd014ff
+index 0000000000..bab407a8b7
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp
-@@ -0,0 +1,31 @@
++++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
+@@ -0,0 +1,197 @@
++//
++// Copyright (c) 2018, Red Hat, Inc. All rights reserved.
++// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
++
++source_hpp %{
++#include "gc/shenandoah/shenandoahBarrierSet.hpp"
++#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
++%}
++
++instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
++  match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval)));
++  ins_cost(10 * DEFAULT_COST);
++
++  effect(TEMP tmp, KILL cr);
++
++  format %{
++    "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapP_shenandoah"
++  %}
++
++  ins_encode %{
++    Register tmp = $tmp$$Register;
++    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
++                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
++                                                   false /* is_cae */, $res$$Register);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
++  match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval)));
++  ins_cost(10 * DEFAULT_COST);
++
++  effect(TEMP tmp, KILL cr);
++
++  format %{
++    "cmpxchgw_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapN_shenandoah"
++  %}
++
++  ins_encode %{
++    Register tmp = $tmp$$Register;
++    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
++                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
++                                                   false /* is_cae */, $res$$Register);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
++  predicate(needs_acquiring_load_reserved(n));
++  match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval)));
++  ins_cost(10 * DEFAULT_COST);
++
++  effect(TEMP tmp, KILL cr);
++
++  format %{
++    "cmpxchg_acq_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapPAcq_shenandoah"
++  %}
++
++  ins_encode %{
++    Register tmp = $tmp$$Register;
++    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
++                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
++                                                   false /* is_cae */, $res$$Register);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
++  predicate(needs_acquiring_load_reserved(n));
++  match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval)));
++  ins_cost(10 * DEFAULT_COST);
++
++  effect(TEMP tmp, KILL cr);
++
++  format %{
++    "cmpxchgw_acq_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapNAcq_shenandoah"
++  %}
++
++  ins_encode %{
++    Register tmp = $tmp$$Register;
++    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
++                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
++                                                   false /* is_cae */, $res$$Register);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
++  match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval)));
++  ins_cost(10 * DEFAULT_COST);
++  effect(TEMP_DEF res, TEMP tmp, KILL cr);
++
++  format %{
++    "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN_shenandoah"
++  %}
++
++  ins_encode %{
++    Register tmp = $tmp$$Register;
++    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
++                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
++                                                   true /* is_cae */, $res$$Register);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
++  match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval)));
++  ins_cost(10 * DEFAULT_COST);
++
++  effect(TEMP_DEF res, TEMP tmp, KILL cr);
++  format %{
++    "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndExchangeP_shenandoah"
++  %}
++
++  ins_encode %{
++    Register tmp = $tmp$$Register;
++    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
++                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
++                                                   true /* is_cae */, $res$$Register);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
++  match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval)));
++  ins_cost(10 * DEFAULT_COST);
++
++  effect(TEMP tmp, KILL cr);
++  format %{
++    "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapN_shenandoah"
++    "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)"
++  %}
++
++  ins_encode %{
++    Register tmp = $tmp$$Register;
++    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
++    // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
++                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
++                                                   false /* is_cae */, $res$$Register);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
++  match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval)));
++  ins_cost(10 * DEFAULT_COST);
++
++  effect(TEMP tmp, KILL cr);
++  format %{
++    "cmpxchg_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapP_shenandoah"
++  %}
++
++  ins_encode %{
++    Register tmp = $tmp$$Register;
++    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
++    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
++                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
++                                                   false /* is_cae */, $res$$Register);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
+diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
+new file mode 100644
+index 0000000000..d6ce8da07b
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
+@@ -0,0 +1,46 @@
 +/*
-+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -15986,20 +15607,34 @@ index 00000000000..8735fd014ff
 + *
 + */
 +
-+#ifndef CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP
-+#define CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP
++#ifndef CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
++#define CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
 +
-+const size_t G1MergeHeapRootsPrefetchCacheSize = 16;
++const int StackAlignmentInBytes = 16;
 +
-+#endif // CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
++// Indicates whether the C calling conventions require that
++// 32-bit integer argument values are extended to 64 bits.
++const bool CCallingConventionRequiresIntsAsLongs = false;
++
++// To be safe, we deoptimize when we come across an access that needs
++// patching. This is similar to what is done on aarch64.
++#define DEOPTIMIZE_WHEN_PATCHING
++
++#define SUPPORTS_NATIVE_CX8
++
++#define SUPPORT_RESERVED_STACK_AREA
++
++#define THREAD_LOCAL_POLL
++
++#endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
 new file mode 100644
-index 00000000000..3c115a2ea02
+index 0000000000..90db2f4460
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
-@@ -0,0 +1,302 @@
++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
+@@ -0,0 +1,111 @@
 +/*
-+ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -16023,292 +15658,102 @@ index 00000000000..3c115a2ea02
 + *
 + */
 +
-+#include "precompiled.hpp"
-+#include "classfile/classLoaderData.hpp"
-+#include "gc/shared/barrierSet.hpp"
-+#include "gc/shared/barrierSetAssembler.hpp"
-+#include "gc/shared/barrierSetNMethod.hpp"
-+#include "gc/shared/collectedHeap.hpp"
-+#include "interpreter/interp_masm.hpp"
-+#include "memory/universe.hpp"
-+#include "runtime/jniHandles.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "runtime/stubRoutines.hpp"
-+#include "runtime/thread.hpp"
-+
-+#define __ masm->
-+
-+void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                                  Register dst, Address src, Register tmp1, Register tmp_thread) {
-+  assert_cond(masm != NULL);
-+
-+  // RA is live. It must be saved around calls.
-+
-+  bool in_heap = (decorators & IN_HEAP) != 0;
-+  bool in_native = (decorators & IN_NATIVE) != 0;
-+  bool is_not_null = (decorators & IS_NOT_NULL) != 0;
-+  switch (type) {
-+    case T_OBJECT:  // fall through
-+    case T_ARRAY: {
-+      if (in_heap) {
-+        if (UseCompressedOops) {
-+          __ lwu(dst, src);
-+          if (is_not_null) {
-+            __ decode_heap_oop_not_null(dst);
-+          } else {
-+            __ decode_heap_oop(dst);
-+          }
-+        } else {
-+          __ ld(dst, src);
-+        }
-+      } else {
-+        assert(in_native, "why else?");
-+        __ ld(dst, src);
-+      }
-+      break;
-+    }
-+    case T_BOOLEAN: __ load_unsigned_byte (dst, src); break;
-+    case T_BYTE:    __ load_signed_byte   (dst, src); break;
-+    case T_CHAR:    __ load_unsigned_short(dst, src); break;
-+    case T_SHORT:   __ load_signed_short  (dst, src); break;
-+    case T_INT:     __ lw                 (dst, src); break;
-+    case T_LONG:    __ ld                 (dst, src); break;
-+    case T_ADDRESS: __ ld                 (dst, src); break;
-+    case T_FLOAT:   __ flw                (f10, src); break;
-+    case T_DOUBLE:  __ fld                (f10, src); break;
-+    default: Unimplemented();
-+  }
-+}
-+
-+void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                                   Address dst, Register val, Register tmp1, Register tmp2) {
-+  assert_cond(masm != NULL);
-+  bool in_heap = (decorators & IN_HEAP) != 0;
-+  bool in_native = (decorators & IN_NATIVE) != 0;
-+  switch (type) {
-+    case T_OBJECT: // fall through
-+    case T_ARRAY: {
-+      val = val == noreg ? zr : val;
-+      if (in_heap) {
-+        if (UseCompressedOops) {
-+          assert(!dst.uses(val), "not enough registers");
-+          if (val != zr) {
-+            __ encode_heap_oop(val);
-+          }
-+          __ sw(val, dst);
-+        } else {
-+          __ sd(val, dst);
-+        }
-+      } else {
-+        assert(in_native, "why else?");
-+        __ sd(val, dst);
-+      }
-+      break;
-+    }
-+    case T_BOOLEAN:
-+      __ andi(val, val, 0x1);  // boolean is true if LSB is 1
-+      __ sb(val, dst);
-+      break;
-+    case T_BYTE:    __ sb(val, dst); break;
-+    case T_CHAR:    __ sh(val, dst); break;
-+    case T_SHORT:   __ sh(val, dst); break;
-+    case T_INT:     __ sw(val, dst); break;
-+    case T_LONG:    __ sd(val, dst); break;
-+    case T_ADDRESS: __ sd(val, dst); break;
-+    case T_FLOAT:   __ fsw(f10,  dst); break;
-+    case T_DOUBLE:  __ fsd(f10,  dst); break;
-+    default: Unimplemented();
-+  }
-+
-+}
-+
-+void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
-+                                                        Register obj, Register tmp, Label& slowpath) {
-+  assert_cond(masm != NULL);
-+  // If mask changes we need to ensure that the inverse is still encodable as an immediate
-+  STATIC_ASSERT(JNIHandles::weak_tag_mask == 1);
-+  __ andi(obj, obj, ~JNIHandles::weak_tag_mask);
-+  __ ld(obj, Address(obj, 0));             // *obj
-+}
-+
-+// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes.
-+void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj,
-+                                        Register var_size_in_bytes,
-+                                        int con_size_in_bytes,
-+                                        Register tmp1,
-+                                        Register tmp2,
-+                                        Label& slow_case,
-+                                        bool is_far) {
-+  assert_cond(masm != NULL);
-+  assert_different_registers(obj, tmp2);
-+  assert_different_registers(obj, var_size_in_bytes);
-+  Register end = tmp2;
-+
-+  __ ld(obj, Address(xthread, JavaThread::tlab_top_offset()));
-+  if (var_size_in_bytes == noreg) {
-+    __ la(end, Address(obj, con_size_in_bytes));
-+  } else {
-+    __ add(end, obj, var_size_in_bytes);
-+  }
-+  __ ld(t0, Address(xthread, JavaThread::tlab_end_offset()));
-+  __ bgtu(end, t0, slow_case, is_far);
-+
-+  // update the tlab top pointer
-+  __ sd(end, Address(xthread, JavaThread::tlab_top_offset()));
-+
-+  // recover var_size_in_bytes if necessary
-+  if (var_size_in_bytes == end) {
-+    __ sub(var_size_in_bytes, var_size_in_bytes, obj);
-+  }
-+}
-+
-+// Defines obj, preserves var_size_in_bytes
-+void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj,
-+                                        Register var_size_in_bytes,
-+                                        int con_size_in_bytes,
-+                                        Register tmp1,
-+                                        Label& slow_case,
-+                                        bool is_far) {
-+  assert_cond(masm != NULL);
-+  assert_different_registers(obj, var_size_in_bytes, tmp1);
-+  if (!Universe::heap()->supports_inline_contig_alloc()) {
-+    __ j(slow_case);
-+  } else {
-+    Register end = tmp1;
-+    Label retry;
-+    __ bind(retry);
-+
-+    // Get the current end of the heap
-+    ExternalAddress address_end((address) Universe::heap()->end_addr());
-+    {
-+      int32_t offset;
-+      __ la_patchable(t1, address_end, offset);
-+      __ ld(t1, Address(t1, offset));
-+    }
-+
-+    // Get the current top of the heap
-+    ExternalAddress address_top((address) Universe::heap()->top_addr());
-+    {
-+      int32_t offset;
-+      __ la_patchable(t0, address_top, offset);
-+      __ addi(t0, t0, offset);
-+      __ lr_d(obj, t0, Assembler::aqrl);
-+    }
-+
-+    // Adjust it my the size of our new object
-+    if (var_size_in_bytes == noreg) {
-+      __ la(end, Address(obj, con_size_in_bytes));
-+    } else {
-+      __ add(end, obj, var_size_in_bytes);
-+    }
-+
-+    // if end < obj then we wrapped around high memory
-+    __ bltu(end, obj, slow_case, is_far);
-+
-+    __ bgtu(end, t1, slow_case, is_far);
-+
-+    // If heap_top hasn't been changed by some other thread, update it.
-+    __ sc_d(t1, end, t0, Assembler::rl);
-+    __ bnez(t1, retry);
-+
-+    incr_allocated_bytes(masm, var_size_in_bytes, con_size_in_bytes, tmp1);
-+  }
-+}
-+
-+void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm,
-+                                               Register var_size_in_bytes,
-+                                               int con_size_in_bytes,
-+                                               Register tmp1) {
-+  assert_cond(masm != NULL);
-+  assert(tmp1->is_valid(), "need temp reg");
-+
-+  __ ld(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset())));
-+  if (var_size_in_bytes->is_valid()) {
-+    __ add(tmp1, tmp1, var_size_in_bytes);
-+  } else {
-+    __ add(tmp1, tmp1, con_size_in_bytes);
-+  }
-+  __ sd(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset())));
-+}
-+
-+void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) {
-+  BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
-+
-+  if (bs_nm == NULL) {
-+    return;
-+  }
++#ifndef CPU_RISCV_GLOBALS_RISCV_HPP
++#define CPU_RISCV_GLOBALS_RISCV_HPP
 +
-+  // RISCV atomic operations require that the memory address be naturally aligned.
-+  __ align(4);
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
 +
-+  Label skip, guard;
-+  Address thread_disarmed_addr(xthread, in_bytes(bs_nm->thread_disarmed_offset()));
++// Sets the default values for platform dependent flags used by the runtime system.
++// (see globals.hpp)
 +
-+  __ lwu(t0, guard);
++define_pd_global(bool, NeedsDeoptSuspend,        false); // only register window machines need this
 +
-+  // Subsequent loads of oops must occur after load of guard value.
-+  // BarrierSetNMethod::disarm sets guard with release semantics.
-+  __ membar(MacroAssembler::LoadLoad);
-+  __ lwu(t1, thread_disarmed_addr);
-+  __ beq(t0, t1, skip);
++define_pd_global(bool, ImplicitNullChecks,       true);  // Generate code for implicit null checks
++define_pd_global(bool, TrapBasedNullChecks,      false);
++define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs past to check cast
 +
-+  int32_t offset = 0;
-+  __ movptr_with_offset(t0, StubRoutines::riscv::method_entry_barrier(), offset);
-+  __ jalr(ra, t0, offset);
-+  __ j(skip);
++define_pd_global(uintx, CodeCacheSegmentSize,    64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment.
++define_pd_global(intx, CodeEntryAlignment,       64);
++define_pd_global(intx, OptoLoopAlignment,        16);
++define_pd_global(intx, InlineFrequencyCount,     100);
 +
-+  __ bind(guard);
++#define DEFAULT_STACK_YELLOW_PAGES (2)
++#define DEFAULT_STACK_RED_PAGES (1)
++// Java_java_net_SocketOutputStream_socketWrite0() uses a 64k buffer on the
++// stack if compiled for unix and LP64. To pass stack overflow tests we need
++// 20 shadow pages.
++#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+5))
++#define DEFAULT_STACK_RESERVED_PAGES (1)
 +
-+  assert(__ offset() % 4 == 0, "bad alignment");
-+  __ emit_int32(0); // nmethod guard value. Skipped over in common case.
++#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES
++#define MIN_STACK_RED_PAGES    DEFAULT_STACK_RED_PAGES
++#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES
++#define MIN_STACK_RESERVED_PAGES (0)
 +
-+  __ bind(skip);
-+}
++define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES);
++define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES);
++define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES);
++define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES);
 +
-+void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) {
-+  BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod();
-+  if (bs == NULL) {
-+    return;
-+  }
++define_pd_global(bool, RewriteBytecodes,     true);
++define_pd_global(bool, RewriteFrequentPairs, true);
 +
-+  Label bad_call;
-+  __ beqz(xmethod, bad_call);
++define_pd_global(bool, UseMembar,            true);
 +
-+  // Pointer chase to the method holder to find out if the method is concurrently unloading.
-+  Label method_live;
-+  __ load_method_holder_cld(t0, xmethod);
++define_pd_global(bool, PreserveFramePointer, false);
 +
-+  // Is it a strong CLD?
-+  __ lwu(t1, Address(t0, ClassLoaderData::keep_alive_offset()));
-+  __ bnez(t1, method_live);
++// GC Ergo Flags
++define_pd_global(uintx, CMSYoungGenPerWorker, 64*M);  // default max size of CMS young gen, per GC worker thread
 +
-+  // Is it a weak but alive CLD?
-+  __ push_reg(RegSet::of(x28, x29), sp);
++define_pd_global(uintx, TypeProfileLevel, 111);
 +
-+  __ ld(x28, Address(t0, ClassLoaderData::holder_offset()));
++define_pd_global(bool, CompactStrings, true);
 +
-+  // Uses x28 & x29, so we must pass new temporaries.
-+  __ resolve_weak_handle(x28, x29);
-+  __ mv(t0, x28);
++// Clear short arrays bigger than one word in an arch-specific way
++define_pd_global(intx, InitArrayShortSize, BytesPerLong);
 +
-+  __ pop_reg(RegSet::of(x28, x29), sp);
++define_pd_global(bool, ThreadLocalHandshakes, true);
 +
-+  __ bnez(t0, method_live);
++define_pd_global(intx, InlineSmallCode,          1000);
 +
-+  __ bind(bad_call);
++#define ARCH_FLAGS(develop,                                                      \
++                   product,                                                      \
++                   diagnostic,                                                   \
++                   experimental,                                                 \
++                   notproduct,                                                   \
++                   range,                                                        \
++                   constraint,                                                   \
++                   writeable)                                                    \
++                                                                                 \
++  product(bool, NearCpool, true,                                                 \
++         "constant pool is close to instructions")                               \
++  product(intx, BlockZeroingLowLimit, 256,                                       \
++          "Minimum size in bytes when block zeroing will be used")               \
++          range(1, max_jint)                                                     \
++  product(bool, TraceTraps, false, "Trace all traps the signal handler")         \
++  /* For now we're going to be safe and add the I/O bits to userspace fences. */ \
++  product(bool, UseConservativeFence, true,                                      \
++          "Extend i for r and o for w in the pred/succ flags of fence")          \
++  product(bool, AvoidUnalignedAccesses, true,                                    \
++          "Avoid generating unaligned memory accesses")                          \
++  experimental(bool, UseRVV, false, "Use RVV instructions")                      \
++  experimental(bool, UseZba, false, "Use Zba instructions")                      \
++  experimental(bool, UseZbb, false, "Use Zbb instructions")                      \
++  experimental(bool, UseZbs, false, "Use Zbs instructions")                      \
++  experimental(bool, UseRVC, false, "Use RVC instructions")
 +
-+  __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
-+  __ bind(method_live);
-+}
-diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
++#endif // CPU_RISCV_GLOBALS_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/icBuffer_riscv.cpp b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp
 new file mode 100644
-index 00000000000..b85f7f5582b
+index 0000000000..cc93103dc5
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
++++ b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp
 @@ -0,0 +1,79 @@
 +/*
-+ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -16331,69 +15776,69 @@ index 00000000000..b85f7f5582b
 + *
 + */
 +
-+#ifndef CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
-+#define CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
-+
++#include "precompiled.hpp"
 +#include "asm/macroAssembler.hpp"
-+#include "gc/shared/barrierSet.hpp"
-+#include "gc/shared/barrierSetNMethod.hpp"
-+#include "memory/allocation.hpp"
-+#include "oops/access.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/icBuffer.hpp"
++#include "gc/shared/collectedHeap.inline.hpp"
++#include "interpreter/bytecodes.hpp"
++#include "memory/resourceArea.hpp"
++#include "nativeInst_riscv.hpp"
++#include "oops/oop.inline.hpp"
 +
-+class BarrierSetAssembler: public CHeapObj<mtGC> {
-+private:
-+  void incr_allocated_bytes(MacroAssembler* masm,
-+                            Register var_size_in_bytes, int con_size_in_bytes,
-+                            Register t1 = noreg);
++int InlineCacheBuffer::ic_stub_code_size() {
++  // 6: auipc + ld + auipc + jalr + address(2 * instruction_size)
++  // 5: auipc + ld + j + address(2 * instruction_size)
++  return (MacroAssembler::far_branches() ? 6 : 5) * NativeInstruction::instruction_size;
++}
 +
-+public:
-+  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-+                                  Register src, Register dst, Register count, RegSet saved_regs) {}
-+  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-+                                  Register start, Register end, Register tmp, RegSet saved_regs) {}
-+  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                       Register dst, Address src, Register tmp1, Register tmp_thread);
-+  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                        Address dst, Register val, Register tmp1, Register tmp2);
++#define __ masm->
 +
-+  virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
-+                                             Register obj, Register tmp, Label& slowpath);
++void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) {
++  assert_cond(code_begin != NULL && entry_point != NULL);
++  ResourceMark rm;
++  CodeBuffer      code(code_begin, ic_stub_code_size());
++  MacroAssembler* masm            = new MacroAssembler(&code);
++  // Note: even though the code contains an embedded value, we do not need reloc info
++  // because
++  // (1) the value is old (i.e., doesn't matter for scavenges)
++  // (2) these ICStubs are removed *before* a GC happens, so the roots disappear
 +
-+  virtual void tlab_allocate(MacroAssembler* masm,
-+    Register obj,                      // result: pointer to object after successful allocation
-+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
-+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
-+    Register tmp1,                     // temp register
-+    Register tmp2,                     // temp register
-+    Label&   slow_case,                // continuation point if fast allocation fails
-+    bool is_far = false
-+  );
++  address start = __ pc();
++  Label l;
++  __ ld(t1, l);
++  __ far_jump(ExternalAddress(entry_point));
++  __ align(wordSize);
++  __ bind(l);
++  __ emit_int64((intptr_t)cached_value);
++  // Only need to invalidate the 1st two instructions - not the whole ic stub
++  ICache::invalidate_range(code_begin, InlineCacheBuffer::ic_stub_code_size());
++  assert(__ pc() - start == ic_stub_code_size(), "must be");
++}
 +
-+  void eden_allocate(MacroAssembler* masm,
-+    Register obj,                      // result: pointer to object after successful allocation
-+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
-+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
-+    Register tmp1,                     // temp register
-+    Label&   slow_case,                // continuation point if fast allocation fails
-+    bool is_far = false
-+  );
-+  virtual void barrier_stubs_init() {}
++address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
++  NativeMovConstReg* move = nativeMovConstReg_at(code_begin);   // creation also verifies the object
++  NativeJump* jump = nativeJump_at(move->next_instruction_address());
++  return jump->jump_destination();
++}
 +
-+  virtual void nmethod_entry_barrier(MacroAssembler* masm);
-+  virtual void c2i_entry_barrier(MacroAssembler* masm);
-+  virtual ~BarrierSetAssembler() {}
-+};
 +
-+#endif // CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp
++void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
++  // The word containing the cached value is at the end of this IC buffer
++  uintptr_t *p = (uintptr_t *)(code_begin + ic_stub_code_size() - wordSize);
++  void* o = (void*)*p;
++  return o;
++}
+diff --git a/src/hotspot/cpu/riscv/icache_riscv.cpp b/src/hotspot/cpu/riscv/icache_riscv.cpp
 new file mode 100644
-index 00000000000..ae7ee4c5a44
+index 0000000000..d615dcfb9e
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp
-@@ -0,0 +1,171 @@
++++ b/src/hotspot/cpu/riscv/icache_riscv.cpp
+@@ -0,0 +1,68 @@
 +/*
-+ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2023, Rivos Inc. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -16417,159 +15862,104 @@ index 00000000000..ae7ee4c5a44
 + */
 +
 +#include "precompiled.hpp"
-+#include "code/codeCache.hpp"
-+#include "code/nativeInst.hpp"
-+#include "gc/shared/barrierSetNMethod.hpp"
-+#include "logging/log.hpp"
-+#include "memory/resourceArea.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "runtime/registerMap.hpp"
-+#include "runtime/thread.hpp"
-+#include "utilities/align.hpp"
-+#include "utilities/debug.hpp"
-+
-+class NativeNMethodBarrier: public NativeInstruction {
-+  address instruction_address() const { return addr_at(0); }
-+
-+  int *guard_addr() {
-+    /* auipc + lwu + fence + lwu + beq + lui + addi + slli + addi + slli + jalr + j */
-+    return reinterpret_cast<int*>(instruction_address() + 12 * 4);
-+  }
-+
-+public:
-+  int get_value() {
-+    return Atomic::load_acquire(guard_addr());
-+  }
++#include "asm/macroAssembler.hpp"
++#include "riscv_flush_icache.hpp"
++#include "runtime/java.hpp"
++#include "runtime/icache.hpp"
 +
-+  void set_value(int value) {
-+    Atomic::release_store(guard_addr(), value);
-+  }
++#define __ _masm->
 +
-+  void verify() const;
-+};
++static int icache_flush(address addr, int lines, int magic) {
++  // To make a store to instruction memory visible to all RISC-V harts,
++  // the writing hart has to execute a data FENCE before requesting that
++  // all remote RISC-V harts execute a FENCE.I.
 +
-+// Store the instruction bitmask, bits and name for checking the barrier.
-+struct CheckInsn {
-+  uint32_t mask;
-+  uint32_t bits;
-+  const char *name;
-+};
++  // We need to make sure stores happens before the I/D cache synchronization.
++  __asm__ volatile("fence rw, rw" : : : "memory");
 +
-+static const struct CheckInsn barrierInsn[] = {
-+  { 0x00000fff, 0x00000297, "auipc  t0, 0           "},
-+  { 0x000fffff, 0x0002e283, "lwu    t0, 48(t0)      "},
-+  { 0xffffffff, 0x0aa0000f, "fence  ir, ir          "},
-+  { 0x000fffff, 0x000be303, "lwu    t1, 112(xthread)"},
-+  { 0x01fff07f, 0x00628063, "beq    t0, t1, skip    "},
-+  { 0x00000fff, 0x000002b7, "lui    t0, imm0        "},
-+  { 0x000fffff, 0x00028293, "addi   t0, t0, imm1    "},
-+  { 0xffffffff, 0x00b29293, "slli   t0, t0, 11      "},
-+  { 0x000fffff, 0x00028293, "addi   t0, t0, imm2    "},
-+  { 0xffffffff, 0x00529293, "slli   t0, t0, 5       "},
-+  { 0x000fffff, 0x000280e7, "jalr   ra, imm3(t0)    "},
-+  { 0x00000fff, 0x0000006f, "j      skip            "}
-+  /* guard: */
-+  /* 32bit nmethod guard value */
-+  /* skip: */
-+};
++  RiscvFlushIcache::flush((uintptr_t)addr, ((uintptr_t)lines) << ICache::log2_line_size);
 +
-+// The encodings must match the instructions emitted by
-+// BarrierSetAssembler::nmethod_entry_barrier. The matching ignores the specific
-+// register numbers and immediate values in the encoding.
-+void NativeNMethodBarrier::verify() const {
-+  intptr_t addr = (intptr_t) instruction_address();
-+  for(unsigned int i = 0; i < sizeof(barrierInsn)/sizeof(struct CheckInsn); i++ ) {
-+    uint32_t inst = *((uint32_t*) addr);
-+    if ((inst & barrierInsn[i].mask) != barrierInsn[i].bits) {
-+      tty->print_cr("Addr: " INTPTR_FORMAT " Code: 0x%x", addr, inst);
-+      fatal("not an %s instruction.", barrierInsn[i].name);
-+    }
-+    addr += 4;
-+  }
++  return magic;
 +}
 +
-+
-+/* We're called from an nmethod when we need to deoptimize it. We do
-+   this by throwing away the nmethod's frame and jumping to the
-+   ic_miss stub. This looks like there has been an IC miss at the
-+   entry of the nmethod, so we resolve the call, which will fall back
-+   to the interpreter if the nmethod has been unloaded. */
-+void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) {
-+
-+  typedef struct {
-+    intptr_t *sp; intptr_t *fp; address ra; address pc;
-+  } frame_pointers_t;
-+
-+  frame_pointers_t *new_frame = (frame_pointers_t *)(return_address_ptr - 5);
-+
-+  JavaThread *thread = JavaThread::current();
-+  RegisterMap reg_map(thread, false);
-+  frame frame = thread->last_frame();
-+
-+  assert(frame.is_compiled_frame() || frame.is_native_frame(), "must be");
-+  assert(frame.cb() == nm, "must be");
-+  frame = frame.sender(&reg_map);
-+
-+  LogTarget(Trace, nmethod, barrier) out;
-+  if (out.is_enabled()) {
-+    ResourceMark mark;
-+    log_trace(nmethod, barrier)("deoptimize(nmethod: %s(%p), return_addr: %p, osr: %d, thread: %p(%s), making rsp: %p) -> %p",
-+                                nm->method()->name_and_sig_as_C_string(),
-+                                nm, *(address *) return_address_ptr, nm->is_osr_method(), thread,
-+                                thread->name(), frame.sp(), nm->verified_entry_point());
++void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) {
++  // Only riscv_flush_icache is supported as I-cache synchronization.
++  // We must make sure the VM can execute such without error.
++  if (!RiscvFlushIcache::test()) {
++    vm_exit_during_initialization("Unable to synchronize I-cache");
 +  }
 +
-+  new_frame->sp = frame.sp();
-+  new_frame->fp = frame.fp();
-+  new_frame->ra = frame.pc();
-+  new_frame->pc = SharedRuntime::get_handle_wrong_method_stub();
-+}
-+
-+// This is the offset of the entry barrier from where the frame is completed.
-+// If any code changes between the end of the verified entry where the entry
-+// barrier resides, and the completion of the frame, then
-+// NativeNMethodCmpBarrier::verify() will immediately complain when it does
-+// not find the expected native instruction at this offset, which needs updating.
-+// Note that this offset is invariant of PreserveFramePointer.
++  address start = (address)icache_flush;
++  *flush_icache_stub = (ICache::flush_icache_stub_t)start;
 +
-+// see BarrierSetAssembler::nmethod_entry_barrier
-+// auipc + lwu + fence + lwu + beq + movptr_with_offset(5 instructions) + jalr + j + int32
-+static const int entry_barrier_offset = -4 * 13;
++  // ICache::invalidate_range() contains explicit condition that the first
++  // call is invoked on the generated icache flush stub code range.
++  ICache::invalidate_range(start, 0);
 +
-+static NativeNMethodBarrier* native_nmethod_barrier(nmethod* nm) {
-+  address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset;
-+  NativeNMethodBarrier* barrier = reinterpret_cast<NativeNMethodBarrier*>(barrier_address);
-+  debug_only(barrier->verify());
-+  return barrier;
++  {
++    StubCodeMark mark(this, "ICache", "fake_stub_for_inlined_icache_flush");
++    __ ret();
++  }
 +}
 +
-+void BarrierSetNMethod::disarm(nmethod* nm) {
-+  if (!supports_entry_barrier(nm)) {
-+    return;
-+  }
++#undef __
+diff --git a/src/hotspot/cpu/riscv/icache_riscv.hpp b/src/hotspot/cpu/riscv/icache_riscv.hpp
+new file mode 100644
+index 0000000000..5bf40ca820
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/icache_riscv.hpp
+@@ -0,0 +1,42 @@
++/*
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  // Disarms the nmethod guard emitted by BarrierSetAssembler::nmethod_entry_barrier.
-+  NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
++#ifndef CPU_RISCV_ICACHE_RISCV_HPP
++#define CPU_RISCV_ICACHE_RISCV_HPP
 +
-+  barrier->set_value(disarmed_value());
-+}
++// Interface for updating the instruction cache. Whenever the VM
++// modifies code, part of the processor instruction cache potentially
++// has to be flushed.
 +
-+bool BarrierSetNMethod::is_armed(nmethod* nm) {
-+  if (!supports_entry_barrier(nm)) {
-+    return false;
-+  }
++class ICache : public AbstractICache {
++public:
++  enum {
++    stub_size      = 16,                // Size of the icache flush stub in bytes
++    line_size      = BytesPerWord,      // conservative
++    log2_line_size = LogBytesPerWord    // log2(line_size)
++  };
++};
 +
-+  NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
-+  return barrier->get_value() != disarmed_value();
-+}
-diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
++#endif // CPU_RISCV_ICACHE_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
 new file mode 100644
-index 00000000000..a419f92b5f6
+index 0000000000..fa5ddc34b2
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
-@@ -0,0 +1,111 @@
++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+@@ -0,0 +1,1931 @@
 +/*
-+ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -16596,1940 +15986,1917 @@ index 00000000000..a419f92b5f6
 +#include "precompiled.hpp"
 +#include "asm/macroAssembler.inline.hpp"
 +#include "gc/shared/barrierSet.hpp"
-+#include "gc/shared/cardTable.hpp"
-+#include "gc/shared/cardTableBarrierSet.hpp"
-+#include "gc/shared/cardTableBarrierSetAssembler.hpp"
-+#include "gc/shared/gc_globals.hpp"
-+#include "interpreter/interp_masm.hpp"
-+
-+#define __ masm->
-+
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "interp_masm_riscv.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "logging/log.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/method.hpp"
++#include "oops/methodData.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "runtime/basicLock.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/safepointMechanism.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/thread.inline.hpp"
 +
-+void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Register tmp) {
-+  assert_cond(masm != NULL);
-+  assert_different_registers(obj, tmp);
-+  BarrierSet* bs = BarrierSet::barrier_set();
-+  assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind");
-+
-+  __ srli(obj, obj, CardTable::card_shift());
-+
-+  assert(CardTable::dirty_card_val() == 0, "must be");
++void InterpreterMacroAssembler::narrow(Register result) {
++  // Get method->_constMethod->_result_type
++  ld(t0, Address(fp, frame::interpreter_frame_method_offset * wordSize));
++  ld(t0, Address(t0, Method::const_offset()));
++  lbu(t0, Address(t0, ConstMethod::result_type_offset()));
 +
-+  __ load_byte_map_base(tmp);
-+  __ add(tmp, obj, tmp);
++  Label done, notBool, notByte, notChar;
 +
-+  if (UseCondCardMark) {
-+    Label L_already_dirty;
-+    __ membar(MacroAssembler::StoreLoad);
-+    __ lbu(t1,  Address(tmp));
-+    __ beqz(t1, L_already_dirty);
-+    __ sb(zr, Address(tmp));
-+    __ bind(L_already_dirty);
-+  } else {
-+    __ sb(zr, Address(tmp));
-+  }
-+}
++  // common case first
++  mv(t1, T_INT);
++  beq(t0, t1, done);
 +
-+void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
-+                                                                    Register start, Register count, Register tmp, RegSet saved_regs) {
-+  assert_cond(masm != NULL);
-+  assert_different_registers(start, tmp);
-+  assert_different_registers(count, tmp);
++  // mask integer result to narrower return type.
++  mv(t1, T_BOOLEAN);
++  bne(t0, t1, notBool);
 +
-+  Label L_loop, L_done;
-+  const Register end = count;
++  andi(result, result, 0x1);
++  j(done);
 +
-+  __ beqz(count, L_done); // zero count - nothing to do
-+  // end = start + count << LogBytesPerHeapOop
-+  __ shadd(end, count, start, count, LogBytesPerHeapOop);
-+  __ sub(end, end, BytesPerHeapOop); // last element address to make inclusive
++  bind(notBool);
++  mv(t1, T_BYTE);
++  bne(t0, t1, notByte);
++  sign_extend(result, result, 8);
++  j(done);
 +
-+  __ srli(start, start, CardTable::card_shift());
-+  __ srli(end, end, CardTable::card_shift());
-+  __ sub(count, end, start); // number of bytes to copy
++  bind(notByte);
++  mv(t1, T_CHAR);
++  bne(t0, t1, notChar);
++  zero_extend(result, result, 16);
++  j(done);
 +
-+  __ load_byte_map_base(tmp);
-+  __ add(start, start, tmp);
++  bind(notChar);
++  sign_extend(result, result, 16);
 +
-+  __ bind(L_loop);
-+  __ add(tmp, start, count);
-+  __ sb(zr, Address(tmp));
-+  __ sub(count, count, 1);
-+  __ bgez(count, L_loop);
-+  __ bind(L_done);
++  // Nothing to do for T_INT
++  bind(done);
++  addw(result, result, zr);
 +}
 +
-+void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                                                Address dst, Register val, Register tmp1, Register tmp2) {
-+  bool in_heap = (decorators & IN_HEAP) != 0;
-+  bool is_array = (decorators & IS_ARRAY) != 0;
-+  bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
-+  bool precise = is_array || on_anonymous;
++void InterpreterMacroAssembler::jump_to_entry(address entry) {
++  assert(entry != NULL, "Entry must have been generated by now");
++  j(entry);
++}
 +
-+  bool needs_post_barrier = val != noreg && in_heap;
-+  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg);
-+  if (needs_post_barrier) {
-+    // flatten object address if needed
-+    if (!precise || dst.offset() == 0) {
-+      store_check(masm, dst.base(), x13);
-+    } else {
-+      assert_cond(masm != NULL);
-+      __ la(x13, dst);
-+      store_check(masm, x13, t0);
-+    }
++void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) {
++  if (JvmtiExport::can_pop_frame()) {
++    Label L;
++    // Initiate popframe handling only if it is not already being
++    // processed. If the flag has the popframe_processing bit set,
++    // it means that this code is called *during* popframe handling - we
++    // don't want to reenter.
++    // This method is only called just after the call into the vm in
++    // call_VM_base, so the arg registers are available.
++    lwu(t1, Address(xthread, JavaThread::popframe_condition_offset()));
++    andi(t0, t1, JavaThread::popframe_pending_bit);
++    beqz(t0, L);
++    andi(t0, t1, JavaThread::popframe_processing_bit);
++    bnez(t0, L);
++    // Call Interpreter::remove_activation_preserving_args_entry() to get the
++    // address of the same-named entrypoint in the generated interpreter code.
++    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry));
++    jr(x10);
++    bind(L);
 +  }
 +}
-diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
-new file mode 100644
-index 00000000000..686fe8fa478
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
-@@ -0,0 +1,42 @@
-+/*
-+ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
-+#define CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
-+
-+#include "asm/macroAssembler.hpp"
-+#include "gc/shared/modRefBarrierSetAssembler.hpp"
-+
-+class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler {
-+protected:
-+  void store_check(MacroAssembler* masm, Register obj, Register tmp);
 +
-+  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
-+                                                Register start, Register count, Register tmp, RegSet saved_regs);
-+  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                            Address dst, Register val, Register tmp1, Register tmp2);
-+};
 +
-+#endif // #ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp
-new file mode 100644
-index 00000000000..7aa2015f9ec
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp
-@@ -0,0 +1,55 @@
-+/*
-+ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
++  ld(x12, Address(xthread, JavaThread::jvmti_thread_state_offset()));
++  const Address tos_addr(x12, JvmtiThreadState::earlyret_tos_offset());
++  const Address oop_addr(x12, JvmtiThreadState::earlyret_oop_offset());
++  const Address val_addr(x12, JvmtiThreadState::earlyret_value_offset());
++  switch (state) {
++    case atos:
++      ld(x10, oop_addr);
++      sd(zr, oop_addr);
++      verify_oop(x10);
++      break;
++    case ltos:
++      ld(x10, val_addr);
++      break;
++    case btos:  // fall through
++    case ztos:  // fall through
++    case ctos:  // fall through
++    case stos:  // fall through
++    case itos:
++      lwu(x10, val_addr);
++      break;
++    case ftos:
++      flw(f10, val_addr);
++      break;
++    case dtos:
++      fld(f10, val_addr);
++      break;
++    case vtos:
++      /* nothing to do */
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++  // Clean up tos value in the thread object
++  mvw(t0, (int) ilgl);
++  sw(t0, tos_addr);
++  sw(zr, val_addr);
++}
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "gc/shared/modRefBarrierSetAssembler.hpp"
 +
-+#define __ masm->
++void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) {
++  if (JvmtiExport::can_force_early_return()) {
++    Label L;
++    ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset()));
++    beqz(t0, L);  // if [thread->jvmti_thread_state() == NULL] then exit
 +
-+void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-+                                                   Register src, Register dst, Register count, RegSet saved_regs) {
++    // Initiate earlyret handling only if it is not already being processed.
++    // If the flag has the earlyret_processing bit set, it means that this code
++    // is called *during* earlyret handling - we don't want to reenter.
++    lwu(t0, Address(t0, JvmtiThreadState::earlyret_state_offset()));
++    mv(t1, JvmtiThreadState::earlyret_pending);
++    bne(t0, t1, L);
 +
-+  if (is_oop) {
-+    gen_write_ref_array_pre_barrier(masm, decorators, dst, count, saved_regs);
++    // Call Interpreter::remove_activation_early_entry() to get the address of the
++    // same-named entrypoint in the generated interpreter code.
++    ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset()));
++    lwu(t0, Address(t0, JvmtiThreadState::earlyret_tos_offset()));
++    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), t0);
++    jr(x10);
++    bind(L);
 +  }
 +}
 +
-+void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-+                                                   Register start, Register count, Register tmp,
-+                                                   RegSet saved_regs) {
-+  if (is_oop) {
-+    gen_write_ref_array_post_barrier(masm, decorators, start, count, tmp, saved_regs);
-+  }
++void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset) {
++  assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode");
++  lhu(reg, Address(xbcp, bcp_offset));
++  revb_h(reg, reg);
 +}
 +
-+void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                                         Address dst, Register val, Register tmp1, Register tmp2) {
-+  if (is_reference_type(type)) {
-+    oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2);
++void InterpreterMacroAssembler::get_dispatch() {
++  int32_t offset = 0;
++  la_patchable(xdispatch, ExternalAddress((address)Interpreter::dispatch_table()), offset);
++  addi(xdispatch, xdispatch, offset);
++}
++
++void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index,
++                                                       int bcp_offset,
++                                                       size_t index_size) {
++  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
++  if (index_size == sizeof(u2)) {
++    load_unsigned_short(index, Address(xbcp, bcp_offset));
++  } else if (index_size == sizeof(u4)) {
++    lwu(index, Address(xbcp, bcp_offset));
++    // Check if the secondary index definition is still ~x, otherwise
++    // we have to change the following assembler code to calculate the
++    // plain index.
++    assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line");
++    xori(index, index, -1);
++    addw(index, index, zr);
++  } else if (index_size == sizeof(u1)) {
++    load_unsigned_byte(index, Address(xbcp, bcp_offset));
 +  } else {
-+    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
++    ShouldNotReachHere();
 +  }
 +}
-diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp
-new file mode 100644
-index 00000000000..00419c3163c
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp
-@@ -0,0 +1,55 @@
-+/*
-+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
 +
-+#ifndef CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
-+#define CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
++// Return
++// Rindex: index into constant pool
++// Rcache: address of cache entry - ConstantPoolCache::base_offset()
++//
++// A caller must add ConstantPoolCache::base_offset() to Rcache to get
++// the true address of the cache entry.
++//
++void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
++                                                           Register index,
++                                                           int bcp_offset,
++                                                           size_t index_size) {
++  assert_different_registers(cache, index);
++  assert_different_registers(cache, xcpool);
++  get_cache_index_at_bcp(index, bcp_offset, index_size);
++  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
++  // Convert from field index to ConstantPoolCacheEntry
++  // riscv already has the cache in xcpool so there is no need to
++  // install it in cache. Instead we pre-add the indexed offset to
++  // xcpool and return it in cache. All clients of this method need to
++  // be modified accordingly.
++  shadd(cache, index, xcpool, cache, 5);
++}
 +
-+#include "asm/macroAssembler.hpp"
-+#include "gc/shared/barrierSetAssembler.hpp"
 +
-+// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other
-+// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected
-+// accesses, which are overridden in the concrete BarrierSetAssembler.
++void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
++                                                                        Register index,
++                                                                        Register bytecode,
++                                                                        int byte_no,
++                                                                        int bcp_offset,
++                                                                        size_t index_size) {
++  get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
++  // We use a 32-bit load here since the layout of 64-bit words on
++  // little-endian machines allow us that.
++  // n.b. unlike x86 cache already includes the index offset
++  la(bytecode, Address(cache,
++                       ConstantPoolCache::base_offset() +
++                       ConstantPoolCacheEntry::indices_offset()));
++  membar(MacroAssembler::AnyAny);
++  lwu(bytecode, bytecode);
++  membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
++  const int shift_count = (1 + byte_no) * BitsPerByte;
++  slli(bytecode, bytecode, XLEN - (shift_count + BitsPerByte));
++  srli(bytecode, bytecode, XLEN - BitsPerByte);
++}
 +
-+class ModRefBarrierSetAssembler: public BarrierSetAssembler {
-+protected:
-+  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
-+                                               Register addr, Register count, RegSet saved_regs) {}
-+  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
-+                                                Register start, Register count, Register tmp, RegSet saved_regs) {}
++void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
++                                                               Register tmp,
++                                                               int bcp_offset,
++                                                               size_t index_size) {
++  assert(cache != tmp, "must use different register");
++  get_cache_index_at_bcp(tmp, bcp_offset, index_size);
++  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
++  // Convert from field index to ConstantPoolCacheEntry index
++  // and from word offset to byte offset
++  assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord,
++         "else change next line");
++  ld(cache, Address(fp, frame::interpreter_frame_cache_offset * wordSize));
++  // skip past the header
++  add(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
++  // construct pointer to cache entry
++  shadd(cache, tmp, cache, tmp, 2 + LogBytesPerWord);
++}
 +
-+  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                            Address dst, Register val, Register tmp1, Register tmp2) = 0;
++// Load object from cpool->resolved_references(index)
++void InterpreterMacroAssembler::load_resolved_reference_at_index(
++                                Register result, Register index, Register tmp) {
++  assert_different_registers(result, index);
 +
-+public:
-+  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-+                                  Register src, Register dst, Register count, RegSet saved_regs);
-+  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-+                                  Register start, Register count, Register tmp, RegSet saved_regs);
-+  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                        Address dst, Register val, Register tmp1, Register tmp2);
-+};
++  get_constant_pool(result);
++  // Load pointer for resolved_references[] objArray
++  ld(result, Address(result, ConstantPool::cache_offset_in_bytes()));
++  ld(result, Address(result, ConstantPoolCache::resolved_references_offset_in_bytes()));
++  resolve_oop_handle(result, tmp);
++  // Add in the index
++  addi(index, index, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
++  shadd(result, index, result, index, LogBytesPerHeapOop);
++  load_heap_oop(result, Address(result, 0));
++}
 +
-+#endif // CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
-new file mode 100644
-index 00000000000..cd568cc723f
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
-@@ -0,0 +1,117 @@
-+/*
-+ * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++void InterpreterMacroAssembler::load_resolved_klass_at_offset(
++                                Register cpool, Register index, Register klass, Register temp) {
++  shadd(temp, index, cpool, temp, LogBytesPerWord);
++  lhu(temp, Address(temp, sizeof(ConstantPool))); // temp = resolved_klass_index
++  ld(klass, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes())); // klass = cpool->_resolved_klasses
++  shadd(klass, temp, klass, temp, LogBytesPerWord);
++  ld(klass, Address(klass, Array<Klass*>::base_offset_in_bytes()));
++}
 +
-+#include "precompiled.hpp"
-+#include "c1/c1_LIRAssembler.hpp"
-+#include "c1/c1_MacroAssembler.hpp"
-+#include "gc/shared/gc_globals.hpp"
-+#include "gc/shenandoah/shenandoahBarrierSet.hpp"
-+#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
-+#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
++// Generate a subtype check: branch to ok_is_subtype if sub_klass is a
++// subtype of super_klass.
++//
++// Args:
++//      x10: superklass
++//      Rsub_klass: subklass
++//
++// Kills:
++//      x12, x15
++void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass,
++                                                  Label& ok_is_subtype) {
++  assert(Rsub_klass != x10, "x10 holds superklass");
++  assert(Rsub_klass != x12, "x12 holds 2ndary super array length");
++  assert(Rsub_klass != x15, "x15 holds 2ndary super array scan ptr");
 +
-+#define __ masm->masm()->
++  // Profile the not-null value's klass.
++  profile_typecheck(x12, Rsub_klass, x15); // blows x12, reloads x15
 +
-+void LIR_OpShenandoahCompareAndSwap::emit_code(LIR_Assembler* masm) {
-+  Register addr = _addr->as_register_lo();
-+  Register newval = _new_value->as_register();
-+  Register cmpval = _cmp_value->as_register();
-+  Register tmp1 = _tmp1->as_register();
-+  Register tmp2 = _tmp2->as_register();
-+  Register result = result_opr()->as_register();
++  // Do the check.
++  check_klass_subtype(Rsub_klass, x10, x12, ok_is_subtype); // blows x12
 +
-+  ShenandoahBarrierSet::assembler()->iu_barrier(masm->masm(), newval, t1);
++  // Profile the failure of the check.
++  profile_typecheck_failed(x12); // blows x12
++}
 +
-+  if (UseCompressedOops) {
-+    __ encode_heap_oop(tmp1, cmpval);
-+    cmpval = tmp1;
-+    __ encode_heap_oop(tmp2, newval);
-+    newval = tmp2;
-+  }
++// Java Expression Stack
 +
-+  ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmpval, newval, /* acquire */ Assembler::aq,
-+                                                 /* release */ Assembler::rl, /* is_cae */ false, result);
++void InterpreterMacroAssembler::pop_ptr(Register r) {
++  ld(r, Address(esp, 0));
++  addi(esp, esp, wordSize);
 +}
 +
-+#undef __
++void InterpreterMacroAssembler::pop_i(Register r) {
++  lw(r, Address(esp, 0)); // lw do signed extended
++  addi(esp, esp, wordSize);
++}
 +
-+#ifdef ASSERT
-+#define __ gen->lir(__FILE__, __LINE__)->
-+#else
-+#define __ gen->lir()->
-+#endif
++void InterpreterMacroAssembler::pop_l(Register r) {
++  ld(r, Address(esp, 0));
++  addi(esp, esp, 2 * Interpreter::stackElementSize);
++}
 +
-+LIR_Opr ShenandoahBarrierSetC1::atomic_cmpxchg_at_resolved(LIRAccess& access, LIRItem& cmp_value, LIRItem& new_value) {
-+  BasicType bt = access.type();
-+  if (access.is_oop()) {
-+    LIRGenerator *gen = access.gen();
-+    if (ShenandoahSATBBarrier) {
-+      pre_barrier(gen, access.access_emit_info(), access.decorators(), access.resolved_addr(),
-+                  LIR_OprFact::illegalOpr /* pre_val */);
-+    }
-+    if (ShenandoahCASBarrier) {
-+      cmp_value.load_item();
-+      new_value.load_item();
++void InterpreterMacroAssembler::push_ptr(Register r) {
++  addi(esp, esp, -wordSize);
++  sd(r, Address(esp, 0));
++}
 +
-+      LIR_Opr tmp1 = gen->new_register(T_OBJECT);
-+      LIR_Opr tmp2 = gen->new_register(T_OBJECT);
-+      LIR_Opr addr = access.resolved_addr()->as_address_ptr()->base();
-+      LIR_Opr result = gen->new_register(T_INT);
++void InterpreterMacroAssembler::push_i(Register r) {
++  addi(esp, esp, -wordSize);
++  addw(r, r, zr); // signed extended
++  sd(r, Address(esp, 0));
++}
 +
-+      __ append(new LIR_OpShenandoahCompareAndSwap(addr, cmp_value.result(), new_value.result(), tmp1, tmp2, result));
-+      return result;
-+    }
-+  }
-+  return BarrierSetC1::atomic_cmpxchg_at_resolved(access, cmp_value, new_value);
++void InterpreterMacroAssembler::push_l(Register r) {
++  addi(esp, esp, -2 * wordSize);
++  sd(zr, Address(esp, wordSize));
++  sd(r, Address(esp));
 +}
 +
-+LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRItem& value) {
-+  LIRGenerator* gen = access.gen();
-+  BasicType type = access.type();
++void InterpreterMacroAssembler::pop_f(FloatRegister r) {
++  flw(r, esp, 0);
++  addi(esp, esp, wordSize);
++}
 +
-+  LIR_Opr result = gen->new_register(type);
-+  value.load_item();
-+  LIR_Opr value_opr = value.result();
++void InterpreterMacroAssembler::pop_d(FloatRegister r) {
++  fld(r, esp, 0);
++  addi(esp, esp, 2 * Interpreter::stackElementSize);
++}
 +
-+  if (access.is_oop()) {
-+    value_opr = iu_barrier(access.gen(), value_opr, access.access_emit_info(), access.decorators());
-+  }
++void InterpreterMacroAssembler::push_f(FloatRegister r) {
++  addi(esp, esp, -wordSize);
++  fsw(r, Address(esp, 0));
++}
 +
-+  assert(type == T_INT || is_reference_type(type) LP64_ONLY( || type == T_LONG ), "unexpected type");
-+  LIR_Opr tmp = gen->new_register(T_INT);
-+  __ xchg(access.resolved_addr(), value_opr, result, tmp);
++void InterpreterMacroAssembler::push_d(FloatRegister r) {
++  addi(esp, esp, -2 * wordSize);
++  fsd(r, Address(esp, 0));
++}
 +
-+  if (access.is_oop()) {
-+    result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), access.decorators());
-+    LIR_Opr tmp_opr = gen->new_register(type);
-+    __ move(result, tmp_opr);
-+    result = tmp_opr;
-+    if (ShenandoahSATBBarrier) {
-+      pre_barrier(access.gen(), access.access_emit_info(), access.decorators(), LIR_OprFact::illegalOpr,
-+                  result /* pre_val */);
-+    }
++void InterpreterMacroAssembler::pop(TosState state) {
++  switch (state) {
++    case atos:
++      pop_ptr();
++      verify_oop(x10);
++      break;
++    case btos:  // fall through
++    case ztos:  // fall through
++    case ctos:  // fall through
++    case stos:  // fall through
++    case itos:
++      pop_i();
++      break;
++    case ltos:
++      pop_l();
++      break;
++    case ftos:
++      pop_f();
++      break;
++    case dtos:
++      pop_d();
++      break;
++    case vtos:
++      /* nothing to do */
++      break;
++    default:
++      ShouldNotReachHere();
 +  }
-+
-+  return result;
 +}
-diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
-new file mode 100644
-index 00000000000..d0ac6e52436
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
-@@ -0,0 +1,712 @@
-+/*
-+ * Copyright (c) 2018, 2020, Red Hat, Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#include "precompiled.hpp"
-+#include "gc/shenandoah/shenandoahBarrierSet.hpp"
-+#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
-+#include "gc/shenandoah/shenandoahForwarding.hpp"
-+#include "gc/shenandoah/shenandoahHeap.inline.hpp"
-+#include "gc/shenandoah/shenandoahHeapRegion.hpp"
-+#include "gc/shenandoah/shenandoahRuntime.hpp"
-+#include "gc/shenandoah/shenandoahThreadLocalData.hpp"
-+#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
-+#include "interpreter/interpreter.hpp"
-+#include "interpreter/interp_masm.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "runtime/thread.hpp"
-+#ifdef COMPILER1
-+#include "c1/c1_LIRAssembler.hpp"
-+#include "c1/c1_MacroAssembler.hpp"
-+#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
-+#endif
-+
-+#define __ masm->
 +
-+void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-+                                                       Register src, Register dst, Register count, RegSet saved_regs) {
-+  if (is_oop) {
-+    bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
-+    if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahIUBarrier || ShenandoahLoadRefBarrier) {
++void InterpreterMacroAssembler::push(TosState state) {
++  switch (state) {
++    case atos:
++      verify_oop(x10);
++      push_ptr();
++      break;
++    case btos:  // fall through
++    case ztos:  // fall through
++    case ctos:  // fall through
++    case stos:  // fall through
++    case itos:
++      push_i();
++      break;
++    case ltos:
++      push_l();
++      break;
++    case ftos:
++      push_f();
++      break;
++    case dtos:
++      push_d();
++      break;
++    case vtos:
++      /* nothing to do */
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
 +
-+      Label done;
++// Helpers for swap and dup
++void InterpreterMacroAssembler::load_ptr(int n, Register val) {
++  ld(val, Address(esp, Interpreter::expr_offset_in_bytes(n)));
++}
 +
-+      // Avoid calling runtime if count == 0
-+      __ beqz(count, done);
++void InterpreterMacroAssembler::store_ptr(int n, Register val) {
++  sd(val, Address(esp, Interpreter::expr_offset_in_bytes(n)));
++}
 +
-+      // Is GC active?
-+      Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
-+      assert_different_registers(src, dst, count, t0);
++void InterpreterMacroAssembler::load_float(Address src) {
++  flw(f10, src);
++}
 +
-+      __ lbu(t0, gc_state);
-+      if (ShenandoahSATBBarrier && dest_uninitialized) {
-+        __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED);
-+        __ beqz(t0, done);
-+      } else {
-+        __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING);
-+        __ beqz(t0, done);
-+      }
++void InterpreterMacroAssembler::load_double(Address src) {
++  fld(f10, src);
++}
 +
-+      __ push_reg(saved_regs, sp);
-+      if (UseCompressedOops) {
-+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry),
-+                        src, dst, count);
-+      } else {
-+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry), src, dst, count);
-+      }
-+      __ pop_reg(saved_regs, sp);
-+      __ bind(done);
-+    }
-+  }
++void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() {
++  // set sender sp
++  mv(x30, sp);
++  // record last_sp
++  sd(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
 +}
 +
-+void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm,
-+                                                                 Register obj,
-+                                                                 Register pre_val,
-+                                                                 Register thread,
-+                                                                 Register tmp,
-+                                                                 bool tosca_live,
-+                                                                 bool expand_call) {
-+  if (ShenandoahSATBBarrier) {
-+    satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call);
++// Jump to from_interpreted entry of a call unless single stepping is possible
++// in this thread in which case we must call the i2i entry
++void InterpreterMacroAssembler::jump_from_interpreted(Register method) {
++  prepare_to_jump_from_interpreted();
++  if (JvmtiExport::can_post_interpreter_events()) {
++    Label run_compiled_code;
++    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
++    // compiled code in threads for which the event is enabled.  Check here for
++    // interp_only_mode if these events CAN be enabled.
++    lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset()));
++    beqz(t0, run_compiled_code);
++    ld(t0, Address(method, Method::interpreter_entry_offset()));
++    jr(t0);
++    bind(run_compiled_code);
 +  }
++
++  ld(t0, Address(method, Method::from_interpreted_offset()));
++  jr(t0);
 +}
 +
-+void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm,
-+                                                           Register obj,
-+                                                           Register pre_val,
-+                                                           Register thread,
-+                                                           Register tmp,
-+                                                           bool tosca_live,
-+                                                           bool expand_call) {
-+  // If expand_call is true then we expand the call_VM_leaf macro
-+  // directly to skip generating the check by
-+  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
-+  assert(thread == xthread, "must be");
++// The following two routines provide a hook so that an implementation
++// can schedule the dispatch in two parts.  amd64 does not do this.
++void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) {
++}
 +
-+  Label done;
-+  Label runtime;
++void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
++  dispatch_next(state, step);
++}
 +
-+  assert_different_registers(obj, pre_val, tmp, t0);
-+  assert(pre_val != noreg &&  tmp != noreg, "expecting a register");
++void InterpreterMacroAssembler::dispatch_base(TosState state,
++                                              address* table,
++                                              bool verifyoop,
++                                              bool generate_poll,
++                                              Register Rs) {
++  // Pay attention to the argument Rs, which is acquiesce in t0.
++  if (VerifyActivationFrameSize) {
++    Unimplemented();
++  }
++  if (verifyoop && state == atos) {
++    verify_oop(x10);
++  }
 +
-+  Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset()));
-+  Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
-+  Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
++  Label safepoint;
++  address* const safepoint_table = Interpreter::safept_table(state);
++  bool needs_thread_local_poll = generate_poll &&
++    SafepointMechanism::uses_thread_local_poll() && table != safepoint_table;
 +
-+  // Is marking active?
-+  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
-+    __ lwu(tmp, in_progress);
++  if (needs_thread_local_poll) {
++    NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
++    ld(t1, Address(xthread, Thread::polling_page_offset()));
++    andi(t1, t1, SafepointMechanism::poll_bit());
++    bnez(t1, safepoint);
++  }
++  if (table == Interpreter::dispatch_table(state)) {
++    mv(t1, Interpreter::distance_from_dispatch_table(state));
++    add(t1, Rs, t1);
++    shadd(t1, t1, xdispatch, t1, 3);
 +  } else {
-+    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
-+    __ lbu(tmp, in_progress);
++    mv(t1, (address)table);
++    shadd(t1, Rs, t1, Rs, 3);
 +  }
-+  __ beqz(tmp, done);
++  ld(t1, Address(t1));
++  jr(t1);
 +
-+  // Do we need to load the previous value?
-+  if (obj != noreg) {
-+    __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
++  if (needs_thread_local_poll) {
++    bind(safepoint);
++    la(t1, ExternalAddress((address)safepoint_table));
++    shadd(t1, Rs, t1, Rs, 3);
++    ld(t1, Address(t1));
++    jr(t1);
 +  }
++}
 +
-+  // Is the previous value null?
-+  __ beqz(pre_val, done);
++void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll, Register Rs) {
++  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll, Rs);
++}
 +
-+  // Can we store original value in the thread's buffer?
-+  // Is index == 0?
-+  // (The index field is typed as size_t.)
-+  __ ld(tmp, index);                        // tmp := *index_adr
-+  __ beqz(tmp, runtime);                    // tmp == 0? If yes, goto runtime
++void InterpreterMacroAssembler::dispatch_only_normal(TosState state, Register Rs) {
++  dispatch_base(state, Interpreter::normal_table(state), Rs);
++}
 +
-+  __ sub(tmp, tmp, wordSize);               // tmp := tmp - wordSize
-+  __ sd(tmp, index);                        // *index_adr := tmp
-+  __ ld(t0, buffer);
-+  __ add(tmp, tmp, t0);                     // tmp := tmp + *buffer_adr
++void InterpreterMacroAssembler::dispatch_only_noverify(TosState state, Register Rs) {
++  dispatch_base(state, Interpreter::normal_table(state), false, Rs);
++}
 +
-+  // Record the previous value
-+  __ sd(pre_val, Address(tmp, 0));
-+  __ j(done);
++void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) {
++  // load next bytecode
++  load_unsigned_byte(t0, Address(xbcp, step));
++  add(xbcp, xbcp, step);
++  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
++}
 +
-+  __ bind(runtime);
-+  // save the live input values
-+  RegSet saved = RegSet::of(pre_val);
-+  if (tosca_live) saved += RegSet::of(x10);
-+  if (obj != noreg) saved += RegSet::of(obj);
++void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
++  // load current bytecode
++  lbu(t0, Address(xbcp, 0));
++  dispatch_base(state, table);
++}
 +
-+  __ push_reg(saved, sp);
++// remove activation
++//
++// Unlock the receiver if this is a synchronized method.
++// Unlock any Java monitors from syncronized blocks.
++// Remove the activation from the stack.
++//
++// If there are locked Java monitors
++//    If throw_monitor_exception
++//       throws IllegalMonitorStateException
++//    Else if install_monitor_exception
++//       installs IllegalMonitorStateException
++//    Else
++//       no error processing
++void InterpreterMacroAssembler::remove_activation(
++                                TosState state,
++                                bool throw_monitor_exception,
++                                bool install_monitor_exception,
++                                bool notify_jvmdi) {
++  // Note: Registers x13 may be in use for the
++  // result check if synchronized method
++  Label unlocked, unlock, no_unlock;
 +
-+  // Calling the runtime using the regular call_VM_leaf mechanism generates
-+  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
-+  // that checks that the *(rfp+frame::interpreter_frame_last_sp) == NULL.
-+  //
-+  // If we care generating the pre-barrier without a frame (e.g. in the
-+  // intrinsified Reference.get() routine) then ebp might be pointing to
-+  // the caller frame and so this check will most likely fail at runtime.
-+  //
-+  // Expanding the call directly bypasses the generation of the check.
-+  // So when we do not have have a full interpreter frame on the stack
-+  // expand_call should be passed true.
-+  if (expand_call) {
-+    assert(pre_val != c_rarg1, "smashed arg");
-+    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
-+  } else {
-+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
-+  }
++  // get the value of _do_not_unlock_if_synchronized into x13
++  const Address do_not_unlock_if_synchronized(xthread,
++    in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++  lbu(x13, do_not_unlock_if_synchronized);
++  sb(zr, do_not_unlock_if_synchronized); // reset the flag
 +
-+  __ pop_reg(saved, sp);
++  // get method access flags
++  ld(x11, Address(fp, frame::interpreter_frame_method_offset * wordSize));
++  ld(x12, Address(x11, Method::access_flags_offset()));
++  andi(t0, x12, JVM_ACC_SYNCHRONIZED);
++  beqz(t0, unlocked);
 +
-+  __ bind(done);
-+}
++  // Don't unlock anything if the _do_not_unlock_if_synchronized flag
++  // is set.
++  bnez(x13, no_unlock);
 +
-+void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) {
-+  assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
++  // unlock monitor
++  push(state); // save result
 +
-+  Label is_null;
-+  __ beqz(dst, is_null);
-+  resolve_forward_pointer_not_null(masm, dst, tmp);
-+  __ bind(is_null);
-+}
++  // BasicObjectLock will be first in list, since this is a
++  // synchronized method. However, need to check that the object has
++  // not been unlocked by an explicit monitorexit bytecode.
++  const Address monitor(fp, frame::interpreter_frame_initial_sp_offset *
++                        wordSize - (int) sizeof(BasicObjectLock));
++  // We use c_rarg1 so that if we go slow path it will be the correct
++  // register for unlock_object to pass to VM directly
++  la(c_rarg1, monitor); // address of first monitor
 +
-+// IMPORTANT: This must preserve all registers, even t0 and t1, except those explicitely
-+// passed in.
-+void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) {
-+  assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
-+  // The below loads the mark word, checks if the lowest two bits are
-+  // set, and if so, clear the lowest two bits and copy the result
-+  // to dst. Otherwise it leaves dst alone.
-+  // Implementing this is surprisingly awkward. I do it here by:
-+  // - Inverting the mark word
-+  // - Test lowest two bits == 0
-+  // - If so, set the lowest two bits
-+  // - Invert the result back, and copy to dst
-+  RegSet saved_regs = RegSet::of(t2);
-+  bool borrow_reg = (tmp == noreg);
-+  if (borrow_reg) {
-+    // No free registers available. Make one useful.
-+    tmp = t0;
-+    if (tmp == dst) {
-+      tmp = t1;
++  ld(x10, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
++  bnez(x10, unlock);
++
++  pop(state);
++  if (throw_monitor_exception) {
++    // Entry already unlocked, need to throw exception
++    call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                    InterpreterRuntime::throw_illegal_monitor_state_exception));
++    should_not_reach_here();
++  } else {
++    // Monitor already unlocked during a stack unroll. If requested,
++    // install an illegal_monitor_state_exception.  Continue with
++    // stack unrolling.
++    if (install_monitor_exception) {
++      call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                      InterpreterRuntime::new_illegal_monitor_state_exception));
 +    }
-+    saved_regs += RegSet::of(tmp);
++    j(unlocked);
 +  }
 +
-+  assert_different_registers(tmp, dst, t2);
-+  __ push_reg(saved_regs, sp);
++  bind(unlock);
++  unlock_object(c_rarg1);
++  pop(state);
 +
-+  Label done;
-+  __ ld(tmp, Address(dst, oopDesc::mark_offset_in_bytes()));
-+  __ xori(tmp, tmp, -1); // eon with 0 is equivalent to XOR with -1
-+  __ andi(t2, tmp, markWord::lock_mask_in_place);
-+  __ bnez(t2, done);
-+  __ ori(tmp, tmp, markWord::marked_value);
-+  __ xori(dst, tmp, -1); // eon with 0 is equivalent to XOR with -1
-+  __ bind(done);
++  // Check that for block-structured locking (i.e., that all locked
++  // objects has been unlocked)
++  bind(unlocked);
 +
-+  __ pop_reg(saved_regs, sp);
-+}
++  // x10: Might contain return value
 +
-+void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm,
-+                                                           Register dst,
-+                                                           Address load_addr,
-+                                                           DecoratorSet decorators) {
-+  assert(ShenandoahLoadRefBarrier, "Should be enabled");
-+  assert(dst != t1 && load_addr.base() != t1, "need t1");
-+  assert_different_registers(load_addr.base(), t0, t1);
++  // Check that all monitors are unlocked
++  {
++    Label loop, exception, entry, restart;
++    const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++    const Address monitor_block_top(
++      fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    const Address monitor_block_bot(
++      fp, frame::interpreter_frame_initial_sp_offset * wordSize);
 +
-+  bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
-+  bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
-+  bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
-+  bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
-+  bool is_narrow  = UseCompressedOops && !is_native;
++    bind(restart);
++    // We use c_rarg1 so that if we go slow path it will be the correct
++    // register for unlock_object to pass to VM directly
++    ld(c_rarg1, monitor_block_top); // points to current entry, starting
++                                     // with top-most entry
++    la(x9, monitor_block_bot);  // points to word before bottom of
++                                  // monitor block
 +
-+  Label heap_stable, not_cset;
-+  __ enter();
-+  Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
-+  __ lbu(t1, gc_state);
++    j(entry);
 +
-+  // Check for heap stability
-+  if (is_strong) {
-+    __ andi(t1, t1, ShenandoahHeap::HAS_FORWARDED);
-+    __ beqz(t1, heap_stable);
-+  } else {
-+    Label lrb;
-+    __ andi(t0, t1, ShenandoahHeap::WEAK_ROOTS);
-+    __ bnez(t0, lrb);
-+    __ andi(t0, t1, ShenandoahHeap::HAS_FORWARDED);
-+    __ beqz(t0, heap_stable);
-+    __ bind(lrb);
-+  }
++    // Entry already locked, need to throw exception
++    bind(exception);
 +
-+  // use x11 for load address
-+  Register result_dst = dst;
-+  if (dst == x11) {
-+    __ mv(t1, dst);
-+    dst = t1;
-+  }
++    if (throw_monitor_exception) {
++      // Throw exception
++      MacroAssembler::call_VM(noreg,
++                              CAST_FROM_FN_PTR(address, InterpreterRuntime::
++                                               throw_illegal_monitor_state_exception));
 +
-+  // Save x10 and x11, unless it is an output register
-+  RegSet saved_regs = RegSet::of(x10, x11) - result_dst;
-+  __ push_reg(saved_regs, sp);
-+  __ la(x11, load_addr);
-+  __ mv(x10, dst);
++      should_not_reach_here();
++    } else {
++      // Stack unrolling. Unlock object and install illegal_monitor_exception.
++      // Unlock does not block, so don't have to worry about the frame.
++      // We don't have to preserve c_rarg1 since we are going to throw an exception.
 +
-+  // Test for in-cset
-+  if (is_strong) {
-+    __ li(t1, (uint64_t)ShenandoahHeap::in_cset_fast_test_addr());
-+    __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint());
-+    __ add(t1, t1, t0);
-+    __ lbu(t1, Address(t1));
-+    __ andi(t0, t1, 1);
-+    __ beqz(t0, not_cset);
-+  }
++      push(state);
++      unlock_object(c_rarg1);
++      pop(state);
 +
-+  __ push_call_clobbered_registers();
-+  if (is_strong) {
-+    if (is_narrow) {
-+      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong_narrow);
-+    } else {
-+      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong);
-+    }
-+  } else if (is_weak) {
-+    if (is_narrow) {
-+      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak_narrow);
-+    } else {
-+      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak);
++      if (install_monitor_exception) {
++        call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                        InterpreterRuntime::
++                                        new_illegal_monitor_state_exception));
++      }
++
++      j(restart);
 +    }
++
++    bind(loop);
++    // check if current entry is used
++    add(t0, c_rarg1, BasicObjectLock::obj_offset_in_bytes());
++    ld(t0, Address(t0, 0));
++    bnez(t0, exception);
++
++    add(c_rarg1, c_rarg1, entry_size); // otherwise advance to next entry
++    bind(entry);
++    bne(c_rarg1, x9, loop); // check if bottom reached if not at bottom then check this entry
++  }
++
++  bind(no_unlock);
++
++  // jvmti support
++  if (notify_jvmdi) {
++    notify_method_exit(state, NotifyJVMTI);    // preserve TOSCA
++
 +  } else {
-+    assert(is_phantom, "only remaining strength");
-+    assert(!is_narrow, "phantom access cannot be narrow");
-+    __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak);
++    notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA
 +  }
-+  __ jalr(ra);
-+  __ mv(t0, x10);
-+  __ pop_call_clobbered_registers();
-+  __ mv(x10, t0);
-+  __ bind(not_cset);
-+  __ mv(result_dst, x10);
-+  __ pop_reg(saved_regs, sp);
 +
-+  __ bind(heap_stable);
-+  __ leave();
-+}
++  // remove activation
++  // get sender esp
++  ld(t1,
++     Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize));
++  if (StackReservedPages > 0) {
++    // testing if reserved zone needs to be re-enabled
++    Label no_reserved_zone_enabling;
 +
-+void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register dst, Register tmp) {
-+  if (ShenandoahIUBarrier) {
-+    __ push_call_clobbered_registers();
++    ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset()));
++    ble(t1, t0, no_reserved_zone_enabling);
 +
-+    satb_write_barrier_pre(masm, noreg, dst, xthread, tmp, true, false);
++    call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), xthread);
++    call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                    InterpreterRuntime::throw_delayed_StackOverflowError));
++    should_not_reach_here();
 +
-+    __ pop_call_clobbered_registers();
++    bind(no_reserved_zone_enabling);
 +  }
++
++  // restore sender esp
++  mv(esp, t1);
++
++  // remove frame anchor
++  leave();
++  // If we're returning to interpreted code we will shortly be
++  // adjusting SP to allow some space for ESP.  If we're returning to
++  // compiled code the saved sender SP was saved in sender_sp, so this
++  // restores it.
++  andi(sp, esp, -16);
 +}
 +
++// Lock object
 +//
-+// Arguments:
-+//
-+// Inputs:
-+//   src:        oop location to load from, might be clobbered
-+//
-+// Output:
-+//   dst:        oop loaded from src location
-+//
-+// Kill:
-+//   x30 (tmp reg)
-+//
-+// Alias:
-+//   dst: x30 (might use x30 as temporary output register to avoid clobbering src)
++// Args:
++//      c_rarg1: BasicObjectLock to be used for locking
 +//
-+void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm,
-+                                            DecoratorSet decorators,
-+                                            BasicType type,
-+                                            Register dst,
-+                                            Address src,
-+                                            Register tmp1,
-+                                            Register tmp_thread) {
-+  // 1: non-reference load, no additional barrier is needed
-+  if (!is_reference_type(type)) {
-+    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
-+    return;
-+  }
++// Kills:
++//      x10
++//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, .. (param regs)
++//      t0, t1 (temp regs)
++void InterpreterMacroAssembler::lock_object(Register lock_reg)
++{
++  assert(lock_reg == c_rarg1, "The argument is only for looks. It must be c_rarg1");
++  if (UseHeavyMonitors) {
++    call_VM(noreg,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
++            lock_reg);
++  } else {
++    Label done;
 +
-+  // 2: load a reference from src location and apply LRB if needed
-+  if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) {
-+    Register result_dst = dst;
++    const Register swap_reg = x10;
++    const Register tmp = c_rarg2;
++    const Register obj_reg = c_rarg3; // Will contain the oop
 +
-+    // Preserve src location for LRB
-+    RegSet saved_regs;
-+    if (dst == src.base()) {
-+      dst = (src.base() == x28) ? x29 : x28;
-+      saved_regs = RegSet::of(dst);
-+      __ push_reg(saved_regs, sp);
-+    }
-+    assert_different_registers(dst, src.base());
++    const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
++    const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
++    const int mark_offset = lock_offset +
++                            BasicLock::displaced_header_offset_in_bytes();
 +
-+    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
++    Label slow_case;
 +
-+    load_reference_barrier(masm, dst, src, decorators);
++    // Load object pointer into obj_reg c_rarg3
++    ld(obj_reg, Address(lock_reg, obj_offset));
 +
-+    if (dst != result_dst) {
-+      __ mv(result_dst, dst);
-+      dst = result_dst;
++    if (UseBiasedLocking) {
++      biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case);
 +    }
 +
-+    if (saved_regs.bits() != 0) {
-+      __ pop_reg(saved_regs, sp);
-+    }
-+  } else {
-+    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
-+  }
++    // Load (object->mark() | 1) into swap_reg
++    ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
++    ori(swap_reg, t0, 1);
 +
-+  // 3: apply keep-alive barrier if needed
-+  if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
-+    __ enter();
-+    __ push_call_clobbered_registers();
-+    satb_write_barrier_pre(masm /* masm */,
-+                           noreg /* obj */,
-+                           dst /* pre_val */,
-+                           xthread /* thread */,
-+                           tmp1 /* tmp */,
-+                           true /* tosca_live */,
-+                           true /* expand_call */);
-+    __ pop_call_clobbered_registers();
-+    __ leave();
-+  }
-+}
++    // Save (object->mark() | 1) into BasicLock's displaced header
++    sd(swap_reg, Address(lock_reg, mark_offset));
 +
-+void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                                             Address dst, Register val, Register tmp1, Register tmp2) {
-+  bool on_oop = is_reference_type(type);
-+  if (!on_oop) {
-+    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
-+    return;
-+  }
++    assert(lock_offset == 0,
++           "displached header must be first word in BasicObjectLock");
 +
-+  // flatten object address if needed
-+  if (dst.offset() == 0) {
-+    if (dst.base() != x13) {
-+      __ mv(x13, dst.base());
++    if (PrintBiasedLockingStatistics) {
++      Label fail, fast;
++      cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, fast, &fail);
++      bind(fast);
++      atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
++                  t1, t0);
++      j(done);
++      bind(fail);
++    } else {
++      cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, done, /*fallthrough*/NULL);
 +    }
-+  } else {
-+    __ la(x13, dst);
-+  }
-+
-+  shenandoah_write_barrier_pre(masm,
-+                               x13 /* obj */,
-+                               tmp2 /* pre_val */,
-+                               xthread /* thread */,
-+                               tmp1  /* tmp */,
-+                               val != noreg /* tosca_live */,
-+                               false /* expand_call */);
 +
-+  if (val == noreg) {
-+    BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), noreg, noreg, noreg);
-+  } else {
-+    iu_barrier(masm, val, tmp1);
-+    // G1 barrier needs uncompressed oop for region cross check.
-+    Register new_val = val;
-+    if (UseCompressedOops) {
-+      new_val = t1;
-+      __ mv(new_val, val);
-+    }
-+    BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), val, noreg, noreg);
-+  }
-+}
++    // Test if the oopMark is an obvious stack pointer, i.e.,
++    //  1) (mark & 7) == 0, and
++    //  2) sp <= mark < mark + os::pagesize()
++    //
++    // These 3 tests can be done by evaluating the following
++    // expression: ((mark - sp) & (7 - os::vm_page_size())),
++    // assuming both stack pointer and pagesize have their
++    // least significant 3 bits clear.
++    // NOTE: the oopMark is in swap_reg x10 as the result of cmpxchg
++    sub(swap_reg, swap_reg, sp);
++    mv(t0, (int64_t)(7 - os::vm_page_size()));
++    andr(swap_reg, swap_reg, t0);
 +
-+void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
-+                                                                  Register obj, Register tmp, Label& slowpath) {
-+  Label done;
-+  // Resolve jobject
-+  BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath);
++    // Save the test result, for recursive case, the result is zero
++    sd(swap_reg, Address(lock_reg, mark_offset));
 +
-+  // Check for null.
-+  __ beqz(obj, done);
++    if (PrintBiasedLockingStatistics) {
++      bnez(swap_reg, slow_case);
++      atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
++                  t1, t0);
++    }
++    beqz(swap_reg, done);
 +
-+  assert(obj != t1, "need t1");
-+  Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset());
-+  __ lbu(t1, gc_state);
++    bind(slow_case);
 +
-+  // Check for heap in evacuation phase
-+  __ andi(t0, t1, ShenandoahHeap::EVACUATION);
-+  __ bnez(t0, slowpath);
++    // Call the runtime routine for slow case
++    call_VM(noreg,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
++            lock_reg);
 +
-+  __ bind(done);
++    bind(done);
++  }
 +}
 +
-+// Special Shenandoah CAS implementation that handles false negatives due
-+// to concurrent evacuation.  The service is more complex than a
-+// traditional CAS operation because the CAS operation is intended to
-+// succeed if the reference at addr exactly matches expected or if the
-+// reference at addr holds a pointer to a from-space object that has
-+// been relocated to the location named by expected.  There are two
-+// races that must be addressed:
-+//  a) A parallel thread may mutate the contents of addr so that it points
-+//     to a different object.  In this case, the CAS operation should fail.
-+//  b) A parallel thread may heal the contents of addr, replacing a
-+//     from-space pointer held in addr with the to-space pointer
-+//     representing the new location of the object.
-+// Upon entry to cmpxchg_oop, it is assured that new_val equals NULL
-+// or it refers to an object that is not being evacuated out of
-+// from-space, or it refers to the to-space version of an object that
-+// is being evacuated out of from-space.
++
++// Unlocks an object. Used in monitorexit bytecode and
++// remove_activation.  Throws an IllegalMonitorException if object is
++// not locked by current thread.
 +//
-+// By default the value held in the result register following execution
-+// of the generated code sequence is 0 to indicate failure of CAS,
-+// non-zero to indicate success. If is_cae, the result is the value most
-+// recently fetched from addr rather than a boolean success indicator.
++// Args:
++//      c_rarg1: BasicObjectLock for lock
 +//
-+// Clobbers t0, t1
-+void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
-+                                                Register addr,
-+                                                Register expected,
-+                                                Register new_val,
-+                                                Assembler::Aqrl acquire,
-+                                                Assembler::Aqrl release,
-+                                                bool is_cae,
-+                                                Register result) {
-+  bool is_narrow = UseCompressedOops;
-+  Assembler::operand_size size = is_narrow ? Assembler::uint32 : Assembler::int64;
++// Kills:
++//      x10
++//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, ... (param regs)
++//      t0, t1 (temp regs)
++void InterpreterMacroAssembler::unlock_object(Register lock_reg)
++{
++  assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1");
 +
-+  assert_different_registers(addr, expected, t0, t1);
-+  assert_different_registers(addr, new_val, t0, t1);
++  if (UseHeavyMonitors) {
++    call_VM(noreg,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
++            lock_reg);
++  } else {
++    Label done;
 +
-+  Label retry, success, fail, done;
++    const Register swap_reg   = x10;
++    const Register header_reg = c_rarg2;  // Will contain the old oopMark
++    const Register obj_reg    = c_rarg3;  // Will contain the oop
 +
-+  __ bind(retry);
++    save_bcp(); // Save in case of exception
 +
-+  // Step1: Try to CAS.
-+  __ cmpxchg(addr, expected, new_val, size, acquire, release, /* result */ t1);
++    // Convert from BasicObjectLock structure to object and BasicLock
++    // structure Store the BasicLock address into x10
++    la(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes()));
 +
-+  // If success, then we are done.
-+  __ beq(expected, t1, success);
++    // Load oop into obj_reg(c_rarg3)
++    ld(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
 +
-+  // Step2: CAS failed, check the forwared pointer.
-+  __ mv(t0, t1);
++    // Free entry
++    sd(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
 +
-+  if (is_narrow) {
-+    __ decode_heap_oop(t0, t0);
-+  }
-+  resolve_forward_pointer(masm, t0);
++    if (UseBiasedLocking) {
++      biased_locking_exit(obj_reg, header_reg, done);
++    }
 +
-+  __ encode_heap_oop(t0, t0);
++    // Load the old header from BasicLock structure
++    ld(header_reg, Address(swap_reg,
++                           BasicLock::displaced_header_offset_in_bytes()));
 +
-+  // Report failure when the forwarded oop was not expected.
-+  __ bne(t0, expected, fail);
++    // Test for recursion
++    beqz(header_reg, done);
 +
-+  // Step 3: CAS again using the forwarded oop.
-+  __ cmpxchg(addr, t1, new_val, size, acquire, release, /* result */ t0);
++    // Atomic swap back the old header
++    cmpxchg_obj_header(swap_reg, header_reg, obj_reg, t0, done, /*fallthrough*/NULL);
 +
-+  // Retry when failed.
-+  __ bne(t0, t1, retry);
++    // Call the runtime routine for slow case.
++    sd(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj
++    call_VM(noreg,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
++            lock_reg);
 +
-+  __ bind(success);
-+  if (is_cae) {
-+    __ mv(result, expected);
-+  } else {
-+    __ addi(result, zr, 1);
-+  }
-+  __ j(done);
++    bind(done);
 +
-+  __ bind(fail);
-+  if (is_cae) {
-+    __ mv(result, t0);
-+  } else {
-+    __ mv(result, zr);
++    restore_bcp();
 +  }
-+
-+  __ bind(done);
 +}
 +
-+#undef __
 +
-+#ifdef COMPILER1
++void InterpreterMacroAssembler::test_method_data_pointer(Register mdp,
++                                                         Label& zero_continue) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  ld(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
++  beqz(mdp, zero_continue);
++}
 +
-+#define __ ce->masm()->
++// Set the method data pointer for the current bcp.
++void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  Label set_mdp;
++  push_reg(RegSet::of(x10, x11), sp); // save x10, x11
 +
-+void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {
-+  ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
-+  // At this point we know that marking is in progress.
-+  // If do_load() is true then we have to emit the
-+  // load of the previous value; otherwise it has already
-+  // been loaded into _pre_val.
-+  __ bind(*stub->entry());
++  // Test MDO to avoid the call if it is NULL.
++  ld(x10, Address(xmethod, in_bytes(Method::method_data_offset())));
++  beqz(x10, set_mdp);
++  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), xmethod, xbcp);
++  // x10: mdi
++  // mdo is guaranteed to be non-zero here, we checked for it before the call.
++  ld(x11, Address(xmethod, in_bytes(Method::method_data_offset())));
++  la(x11, Address(x11, in_bytes(MethodData::data_offset())));
++  add(x10, x11, x10);
++  sd(x10, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
++  bind(set_mdp);
++  pop_reg(RegSet::of(x10, x11), sp);
++}
 +
-+  assert(stub->pre_val()->is_register(), "Precondition.");
++void InterpreterMacroAssembler::verify_method_data_pointer() {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++#ifdef ASSERT
++  Label verify_continue;
++  add(sp, sp, -4 * wordSize);
++  sd(x10, Address(sp, 0));
++  sd(x11, Address(sp, wordSize));
++  sd(x12, Address(sp, 2 * wordSize));
++  sd(x13, Address(sp, 3 * wordSize));
++  test_method_data_pointer(x13, verify_continue); // If mdp is zero, continue
++  get_method(x11);
 +
-+  Register pre_val_reg = stub->pre_val()->as_register();
++  // If the mdp is valid, it will point to a DataLayout header which is
++  // consistent with the bcp.  The converse is highly probable also.
++  lh(x12, Address(x13, in_bytes(DataLayout::bci_offset())));
++  ld(t0, Address(x11, Method::const_offset()));
++  add(x12, x12, t0);
++  la(x12, Address(x12, ConstMethod::codes_offset()));
++  beq(x12, xbcp, verify_continue);
++  // x10: method
++  // xbcp: bcp // xbcp == 22
++  // x13: mdp
++  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp),
++               x11, xbcp, x13);
++  bind(verify_continue);
++  ld(x10, Address(sp, 0));
++  ld(x11, Address(sp, wordSize));
++  ld(x12, Address(sp, 2 * wordSize));
++  ld(x13, Address(sp, 3 * wordSize));
++  add(sp, sp, 4 * wordSize);
++#endif // ASSERT
++}
 +
-+  if (stub->do_load()) {
-+    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */);
-+  }
-+  __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true);
-+  ce->store_parameter(stub->pre_val()->as_register(), 0);
-+  __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
-+  __ j(*stub->continuation());
++
++void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in,
++                                                int constant,
++                                                Register value) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  Address data(mdp_in, constant);
++  sd(value, data);
 +}
 +
-+void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce,
-+                                                                    ShenandoahLoadReferenceBarrierStub* stub) {
-+  ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
-+  __ bind(*stub->entry());
 +
-+  DecoratorSet decorators = stub->decorators();
-+  bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
-+  bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
-+  bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
-+  bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
++                                                      int constant,
++                                                      bool decrement) {
++  increment_mdp_data_at(mdp_in, noreg, constant, decrement);
++}
 +
-+  Register obj = stub->obj()->as_register();
-+  Register res = stub->result()->as_register();
-+  Register addr = stub->addr()->as_pointer_register();
-+  Register tmp1 = stub->tmp1()->as_register();
-+  Register tmp2 = stub->tmp2()->as_register();
++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
++                                                      Register reg,
++                                                      int constant,
++                                                      bool decrement) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  // %%% this does 64bit counters at best it is wasting space
++  // at worst it is a rare bug when counters overflow
 +
-+  assert(res == x10, "result must arrive in x10");
-+  assert_different_registers(tmp1, tmp2, t0);
++  assert_different_registers(t1, t0, mdp_in, reg);
 +
-+  if (res != obj) {
-+    __ mv(res, obj);
++  Address addr1(mdp_in, constant);
++  Address addr2(t1, 0);
++  Address &addr = addr1;
++  if (reg != noreg) {
++    la(t1, addr1);
++    add(t1, t1, reg);
++    addr = addr2;
 +  }
 +
-+  if (is_strong) {
-+    // Check for object in cset.
-+    __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr());
-+    __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint());
-+    __ add(tmp2, tmp2, tmp1);
-+    __ lbu(tmp2, Address(tmp2));
-+    __ beqz(tmp2, *stub->continuation(), true /* is_far */);
++  if (decrement) {
++    ld(t0, addr);
++    addi(t0, t0, -DataLayout::counter_increment);
++    Label L;
++    bltz(t0, L);      // skip store if counter underflow
++    sd(t0, addr);
++    bind(L);
++  } else {
++    assert(DataLayout::counter_increment == 1,
++           "flow-free idiom only works with 1");
++    ld(t0, addr);
++    addi(t0, t0, DataLayout::counter_increment);
++    Label L;
++    blez(t0, L);       // skip store if counter overflow
++    sd(t0, addr);
++    bind(L);
 +  }
++}
 +
-+  ce->store_parameter(res, 0);
-+  ce->store_parameter(addr, 1);
++void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
++                                                int flag_byte_constant) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  int flags_offset = in_bytes(DataLayout::flags_offset());
++  // Set the flag
++  lbu(t1, Address(mdp_in, flags_offset));
++  ori(t1, t1, flag_byte_constant);
++  sb(t1, Address(mdp_in, flags_offset));
++}
 +
-+  if (is_strong) {
-+    if (is_native) {
-+      __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin()));
-+    } else {
-+      __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_rt_code_blob()->code_begin()));
-+    }
-+  } else if (is_weak) {
-+    __ far_call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin()));
++
++void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in,
++                                                 int offset,
++                                                 Register value,
++                                                 Register test_value_out,
++                                                 Label& not_equal_continue) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  if (test_value_out == noreg) {
++    ld(t1, Address(mdp_in, offset));
++    bne(value, t1, not_equal_continue);
 +  } else {
-+    assert(is_phantom, "only remaining strength");
-+    __ far_call(RuntimeAddress(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin()));
++    // Put the test value into a register, so caller can use it:
++    ld(test_value_out, Address(mdp_in, offset));
++    bne(value, test_value_out, not_equal_continue);
 +  }
++}
 +
-+  __ j(*stub->continuation());
++
++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
++                                                     int offset_of_disp) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  ld(t1, Address(mdp_in, offset_of_disp));
++  add(mdp_in, mdp_in, t1);
++  sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
 +}
 +
-+#undef __
++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
++                                                     Register reg,
++                                                     int offset_of_disp) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  add(t1, mdp_in, reg);
++  ld(t1, Address(t1, offset_of_disp));
++  add(mdp_in, mdp_in, t1);
++  sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
++}
 +
-+#define __ sasm->
 +
-+void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
-+  __ prologue("shenandoah_pre_barrier", false);
++void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in,
++                                                       int constant) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  addi(mdp_in, mdp_in, (unsigned)constant);
++  sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
++}
 +
-+  // arg0 : previous value of memory
 +
-+  BarrierSet* bs = BarrierSet::barrier_set();
++void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
 +
-+  const Register pre_val = x10;
-+  const Register thread = xthread;
-+  const Register tmp = t0;
++  // save/restore across call_VM
++  addi(sp, sp, -2 * wordSize);
++  sd(zr, Address(sp, 0));
++  sd(return_bci, Address(sp, wordSize));
++  call_VM(noreg,
++          CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret),
++          return_bci);
++  ld(zr, Address(sp, 0));
++  ld(return_bci, Address(sp, wordSize));
++  addi(sp, sp, 2 * wordSize);
++}
 +
-+  Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
-+  Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
++void InterpreterMacroAssembler::profile_taken_branch(Register mdp,
++                                                     Register bumped_count) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
 +
-+  Label done;
-+  Label runtime;
++    // If no method data exists, go to profile_continue.
++    // Otherwise, assign to mdp
++    test_method_data_pointer(mdp, profile_continue);
 +
-+  // Is marking still active?
-+  Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
-+  __ lb(tmp, gc_state);
-+  __ andi(tmp, tmp, ShenandoahHeap::MARKING);
-+  __ beqz(tmp, done);
++    // We are taking a branch.  Increment the taken count.
++    Address data(mdp, in_bytes(JumpData::taken_offset()));
++    ld(bumped_count, data);
++    assert(DataLayout::counter_increment == 1,
++            "flow-free idiom only works with 1");
++    addi(bumped_count, bumped_count, DataLayout::counter_increment);
++    Label L;
++    // eg: bumped_count=0x7fff ffff ffff ffff  + 1 < 0. so we use <= 0;
++    blez(bumped_count, L);       // skip store if counter overflow,
++    sd(bumped_count, data);
++    bind(L);
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()));
++    bind(profile_continue);
++  }
++}
 +
-+  // Can we store original value in the thread's buffer?
-+  __ ld(tmp, queue_index);
-+  __ beqz(tmp, runtime);
++void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
 +
-+  __ sub(tmp, tmp, wordSize);
-+  __ sd(tmp, queue_index);
-+  __ ld(t1, buffer);
-+  __ add(tmp, tmp, t1);
-+  __ load_parameter(0, t1);
-+  __ sd(t1, Address(tmp, 0));
-+  __ j(done);
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
 +
-+  __ bind(runtime);
-+  __ push_call_clobbered_registers();
-+  __ load_parameter(0, pre_val);
-+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
-+  __ pop_call_clobbered_registers();
-+  __ bind(done);
++    // We are taking a branch.  Increment the not taken count.
++    increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()));
 +
-+  __ epilogue();
++    // The method data pointer needs to be updated to correspond to
++    // the next bytecode
++    update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size()));
++    bind(profile_continue);
++  }
 +}
 +
-+void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm,
-+                                                                                    DecoratorSet decorators) {
-+  __ prologue("shenandoah_load_reference_barrier", false);
-+  // arg0 : object to be resolved
++void InterpreterMacroAssembler::profile_call(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
 +
-+  __ push_call_clobbered_registers();
-+  __ load_parameter(0, x10);
-+  __ load_parameter(1, x11);
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
 +
-+  bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
-+  bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
-+  bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
-+  bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
-+  if (is_strong) {
-+    if (is_native) {
-+      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong);
-+    } else {
-+      if (UseCompressedOops) {
-+        __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong_narrow);
-+      } else {
-+        __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong);
-+      }
-+    }
-+  } else if (is_weak) {
-+    assert(!is_native, "weak must not be called off-heap");
-+    if (UseCompressedOops) {
-+      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak_narrow);
-+    } else {
-+      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak);
-+    }
-+  } else {
-+    assert(is_phantom, "only remaining strength");
-+    assert(is_native, "phantom must only be called off-heap");
-+    __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_phantom);
-+  }
-+  __ jalr(ra);
-+  __ mv(t0, x10);
-+  __ pop_call_clobbered_registers();
-+  __ mv(x10, t0);
++    // We are making a call.  Increment the count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
 +
-+  __ epilogue();
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size()));
++    bind(profile_continue);
++  }
 +}
 +
-+#undef __
++void InterpreterMacroAssembler::profile_final_call(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
 +
-+#endif // COMPILER1
-diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
-new file mode 100644
-index 00000000000..a705f497667
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
-@@ -0,0 +1,88 @@
-+/*
-+ * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
 +
-+#ifndef CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
-+#define CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
++    // We are making a call.  Increment the count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
 +
-+#include "asm/macroAssembler.hpp"
-+#include "gc/shared/barrierSetAssembler.hpp"
-+#include "gc/shenandoah/shenandoahBarrierSet.hpp"
-+#ifdef COMPILER1
-+class LIR_Assembler;
-+class ShenandoahPreBarrierStub;
-+class ShenandoahLoadReferenceBarrierStub;
-+class StubAssembler;
-+#endif
-+class StubCodeGenerator;
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp,
++                           in_bytes(VirtualCallData::
++                                    virtual_call_data_size()));
++    bind(profile_continue);
++  }
++}
 +
-+class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
-+private:
 +
-+  void satb_write_barrier_pre(MacroAssembler* masm,
-+                              Register obj,
-+                              Register pre_val,
-+                              Register thread,
-+                              Register tmp,
-+                              bool tosca_live,
-+                              bool expand_call);
-+  void shenandoah_write_barrier_pre(MacroAssembler* masm,
-+                                    Register obj,
-+                                    Register pre_val,
-+                                    Register thread,
-+                                    Register tmp,
-+                                    bool tosca_live,
-+                                    bool expand_call);
++void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
++                                                     Register mdp,
++                                                     Register reg2,
++                                                     bool receiver_can_be_null) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
 +
-+  void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg);
-+  void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg);
-+  void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, DecoratorSet decorators);
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
 +
-+public:
++    Label skip_receiver_profile;
++    if (receiver_can_be_null) {
++      Label not_null;
++      // We are making a call.  Increment the count for null receiver.
++      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++      j(skip_receiver_profile);
++      bind(not_null);
++    }
 +
-+  void iu_barrier(MacroAssembler* masm, Register dst, Register tmp);
++    // Record the receiver type.
++    record_klass_in_profile(receiver, mdp, reg2, true);
++    bind(skip_receiver_profile);
 +
-+#ifdef COMPILER1
-+  void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub);
-+  void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub);
-+  void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
-+  void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators);
-+#endif
++    // The method data pointer needs to be updated to reflect the new target.
 +
-+  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-+                                  Register src, Register dst, Register count, RegSet saved_regs);
++    update_mdp_by_constant(mdp,
++                           in_bytes(VirtualCallData::
++                                    virtual_call_data_size()));
++    bind(profile_continue);
++  }
++}
 +
-+  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                       Register dst, Address src, Register tmp1, Register tmp_thread);
-+  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                        Address dst, Register val, Register tmp1, Register tmp2);
++// This routine creates a state machine for updating the multi-row
++// type profile at a virtual call site (or other type-sensitive bytecode).
++// The machine visits each row (of receiver/count) until the receiver type
++// is found, or until it runs out of rows.  At the same time, it remembers
++// the location of the first empty row.  (An empty row records null for its
++// receiver, and can be allocated for a newly-observed receiver type.)
++// Because there are two degrees of freedom in the state, a simple linear
++// search will not work; it must be a decision tree.  Hence this helper
++// function is recursive, to generate the required tree structured code.
++// It's the interpreter, so we are trading off code space for speed.
++// See below for example code.
++void InterpreterMacroAssembler::record_klass_in_profile_helper(
++                                Register receiver, Register mdp,
++                                Register reg2,
++                                Label& done, bool is_virtual_call) {
++  if (TypeProfileWidth == 0) {
++    if (is_virtual_call) {
++      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++    }
 +
-+  virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
-+                                             Register obj, Register tmp, Label& slowpath);
++  } else {
++    int non_profiled_offset = -1;
++    if (is_virtual_call) {
++      non_profiled_offset = in_bytes(CounterData::count_offset());
++    }
 +
-+  void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val,
-+                   Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result);
-+};
++    record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth,
++      &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset);
++  }
++}
 +
-+#endif // CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
-new file mode 100644
-index 00000000000..6c855f23c2a
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
-@@ -0,0 +1,285 @@
-+//
-+// Copyright (c) 2018, Red Hat, Inc. All rights reserved.
-+// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+//
-+// This code is free software; you can redistribute it and/or modify it
-+// under the terms of the GNU General Public License version 2 only, as
-+// published by the Free Software Foundation.
-+//
-+// This code is distributed in the hope that it will be useful, but WITHOUT
-+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+// version 2 for more details (a copy is included in the LICENSE file that
-+// accompanied this code).
-+//
-+// You should have received a copy of the GNU General Public License version
-+// 2 along with this work; if not, write to the Free Software Foundation,
-+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+//
-+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+// or visit www.oracle.com if you need additional information or have any
-+// questions.
-+//
-+//
++void InterpreterMacroAssembler::record_item_in_profile_helper(
++  Register item, Register mdp, Register reg2, int start_row, Label& done, int total_rows,
++  OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, int non_profiled_offset) {
++  int last_row = total_rows - 1;
++  assert(start_row <= last_row, "must be work left to do");
++  // Test this row for both the item and for null.
++  // Take any of three different outcomes:
++  //   1. found item => increment count and goto done
++  //   2. found null => keep looking for case 1, maybe allocate this cell
++  //   3. found something else => keep looking for cases 1 and 2
++  // Case 3 is handled by a recursive call.
++  for (int row = start_row; row <= last_row; row++) {
++    Label next_test;
++    bool test_for_null_also = (row == start_row);
 +
-+source_hpp %{
-+#include "gc/shenandoah/shenandoahBarrierSet.hpp"
-+#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
-+%}
++    // See if the item is item[n].
++    int item_offset = in_bytes(item_offset_fn(row));
++    test_mdp_data_at(mdp, item_offset, item,
++                     (test_for_null_also ? reg2 : noreg),
++                     next_test);
++    // (Reg2 now contains the item from the CallData.)
 +
-+instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
-+  match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
++    // The item is item[n].  Increment count[n].
++    int count_offset = in_bytes(item_count_offset_fn(row));
++    increment_mdp_data_at(mdp, count_offset);
++    j(done);
++    bind(next_test);
 +
-+  effect(TEMP tmp, KILL cr);
++    if (test_for_null_also) {
++      Label found_null;
++      // Failed the equality check on item[n]...  Test for null.
++      if (start_row == last_row) {
++        // The only thing left to do is handle the null case.
++        if (non_profiled_offset >= 0) {
++          beqz(reg2, found_null);
++          // Item did not match any saved item and there is no empty row for it.
++          // Increment total counter to indicate polymorphic case.
++          increment_mdp_data_at(mdp, non_profiled_offset);
++          j(done);
++          bind(found_null);
++        } else {
++          bnez(reg2, done);
++        }
++        break;
++      }
++      // Since null is rare, make it be the branch-taken case.
++      beqz(reg2, found_null);
 +
-+  format %{
-+    "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapP_shenandoah"
-+  %}
++      // Put all the "Case 3" tests here.
++      record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows,
++        item_offset_fn, item_count_offset_fn, non_profiled_offset);
 +
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
-+                                                   false /* is_cae */, $res$$Register);
-+  %}
++      // Found a null.  Keep searching for a matching item,
++      // but remember that this is an empty (unused) slot.
++      bind(found_null);
++    }
++  }
 +
-+  ins_pipe(pipe_slow);
-+%}
++  // In the fall-through case, we found no matching item, but we
++  // observed the item[start_row] is NULL.
++  // Fill in the item field and increment the count.
++  int item_offset = in_bytes(item_offset_fn(start_row));
++  set_mdp_data_at(mdp, item_offset, item);
++  int count_offset = in_bytes(item_count_offset_fn(start_row));
++  mv(reg2, DataLayout::counter_increment);
++  set_mdp_data_at(mdp, count_offset, reg2);
++  if (start_row > 0) {
++    j(done);
++  }
++}
 +
-+instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
-+  match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
++// Example state machine code for three profile rows:
++//   # main copy of decision tree, rooted at row[1]
++//   if (row[0].rec == rec) then [
++//     row[0].incr()
++//     goto done
++//   ]
++//   if (row[0].rec != NULL) then [
++//     # inner copy of decision tree, rooted at row[1]
++//     if (row[1].rec == rec) then [
++//       row[1].incr()
++//       goto done
++//     ]
++//     if (row[1].rec != NULL) then [
++//       # degenerate decision tree, rooted at row[2]
++//       if (row[2].rec == rec) then [
++//         row[2].incr()
++//         goto done
++//       ]
++//       if (row[2].rec != NULL) then [
++//         count.incr()
++//         goto done
++//       ] # overflow
++//       row[2].init(rec)
++//       goto done
++//     ] else [
++//       # remember row[1] is empty
++//       if (row[2].rec == rec) then [
++//         row[2].incr()
++//         goto done
++//       ]
++//       row[1].init(rec)
++//       goto done
++//     ]
++//   else [
++//     # remember row[0] is empty
++//     if (row[1].rec == rec) then [
++//       row[1].incr()
++//       goto done
++//     ]
++//     if (row[2].rec == rec) then [
++//       row[2].incr()
++//       goto done
++//     ]
++//     row[0].init(rec)
++//     goto done
++//   ]
++//   done:
 +
-+  effect(TEMP tmp, KILL cr);
++void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
++                                                        Register mdp, Register reg2,
++                                                        bool is_virtual_call) {
++  assert(ProfileInterpreter, "must be profiling");
++  Label done;
 +
-+  format %{
-+    "cmpxchgw_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapN_shenandoah"
-+  %}
++  record_klass_in_profile_helper(receiver, mdp, reg2, done, is_virtual_call);
 +
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
-+                                                   false /* is_cae */, $res$$Register);
-+  %}
++  bind(done);
++}
 +
-+  ins_pipe(pipe_slow);
-+%}
++void InterpreterMacroAssembler::profile_ret(Register return_bci, Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
 +
-+instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
-+  predicate(needs_acquiring_load_reserved(n));
-+  match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
 +
-+  effect(TEMP tmp, KILL cr);
++    // Update the total ret count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
 +
-+  format %{
-+    "cmpxchg_acq_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapPAcq_shenandoah"
-+  %}
++    for (uint row = 0; row < RetData::row_limit(); row++) {
++      Label next_test;
 +
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
-+                                                   false /* is_cae */, $res$$Register);
-+  %}
-+
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
-+  predicate(needs_acquiring_load_reserved(n));
-+  match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
-+
-+  effect(TEMP tmp, KILL cr);
-+
-+  format %{
-+    "cmpxchgw_acq_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapNAcq_shenandoah"
-+  %}
++      // See if return_bci is equal to bci[n]:
++      test_mdp_data_at(mdp,
++                       in_bytes(RetData::bci_offset(row)),
++                       return_bci, noreg,
++                       next_test);
 +
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
-+                                                   false /* is_cae */, $res$$Register);
-+  %}
++      // return_bci is equal to bci[n].  Increment the count.
++      increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)));
 +
-+  ins_pipe(pipe_slow);
-+%}
++      // The method data pointer needs to be updated to reflect the new target.
++      update_mdp_by_offset(mdp,
++                           in_bytes(RetData::bci_displacement_offset(row)));
++      j(profile_continue);
++      bind(next_test);
++    }
 +
-+instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
-+  match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
-+  effect(TEMP_DEF res, TEMP tmp, KILL cr);
++    update_mdp_for_ret(return_bci);
 +
-+  format %{
-+    "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN_shenandoah"
-+  %}
++    bind(profile_continue);
++  }
++}
 +
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
-+                                                   true /* is_cae */, $res$$Register);
-+  %}
++void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
 +
-+  ins_pipe(pipe_slow);
-+%}
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
 +
-+instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
-+  match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
++    set_mdp_flag_at(mdp, BitData::null_seen_byte_constant());
 +
-+  effect(TEMP_DEF res, TEMP tmp, KILL cr);
-+  format %{
-+    "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndExchangeP_shenandoah"
-+  %}
++    // The method data pointer needs to be updated.
++    int mdp_delta = in_bytes(BitData::bit_data_size());
++    if (TypeProfileCasts) {
++      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
++    }
++    update_mdp_by_constant(mdp, mdp_delta);
 +
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
-+                                                   true /* is_cae */, $res$$Register);
-+  %}
++    bind(profile_continue);
++  }
++}
 +
-+  ins_pipe(pipe_slow);
-+%}
++void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) {
++    if (ProfileInterpreter && TypeProfileCasts) {
++    Label profile_continue;
 +
-+instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
-+  match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
 +
-+  effect(TEMP tmp, KILL cr);
-+  format %{
-+    "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapN_shenandoah"
-+    "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)"
-+  %}
++    int count_offset = in_bytes(CounterData::count_offset());
++    // Back up the address, since we have already bumped the mdp.
++    count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
 +
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-+    // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
-+                                                   false /* is_cae */, $res$$Register);
-+  %}
++    // *Decrement* the counter.  We expect to see zero or small negatives.
++    increment_mdp_data_at(mdp, count_offset, true);
 +
-+  ins_pipe(pipe_slow);
-+%}
++    bind (profile_continue);
++  }
++}
 +
-+instruct compareAndExchangeNAcq_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
-+  predicate(needs_acquiring_load_reserved(n));
-+  match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
++void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
 +
-+  effect(TEMP_DEF res, TEMP tmp, KILL cr);
-+  format %{
-+    "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq_shenandoah"
-+  %}
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
 +
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register);
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
-+                                                   true /* is_cae */, $res$$Register);
-+  %}
++    // The method data pointer needs to be updated.
++    int mdp_delta = in_bytes(BitData::bit_data_size());
++    if (TypeProfileCasts) {
++      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
 +
-+  ins_pipe(pipe_slow);
-+%}
++      // Record the object type.
++      record_klass_in_profile(klass, mdp, reg2, false);
++    }
++    update_mdp_by_constant(mdp, mdp_delta);
 +
-+instruct compareAndExchangePAcq_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
-+  predicate(needs_acquiring_load_reserved(n));
-+  match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
++    bind(profile_continue);
++  }
++}
 +
-+  effect(TEMP_DEF res, TEMP tmp, KILL cr);
-+  format %{
-+    "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq_shenandoah"
-+  %}
++void InterpreterMacroAssembler::profile_switch_default(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
 +
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register);
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
-+                                                   true /* is_cae */, $res$$Register);
-+  %}
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
 +
-+  ins_pipe(pipe_slow);
-+%}
++    // Update the default case count
++    increment_mdp_data_at(mdp,
++                          in_bytes(MultiBranchData::default_count_offset()));
 +
-+instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
-+  match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
++    // The method data pointer needs to be updated.
++    update_mdp_by_offset(mdp,
++                         in_bytes(MultiBranchData::
++                                  default_displacement_offset()));
 +
-+  effect(TEMP tmp, KILL cr);
-+  format %{
-+    "cmpxchg_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapP_shenandoah"
-+  %}
++    bind(profile_continue);
++  }
++}
 +
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
-+                                                   false /* is_cae */, $res$$Register);
-+  %}
++void InterpreterMacroAssembler::profile_switch_case(Register index,
++                                                    Register mdp,
++                                                    Register reg2) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
 +
-+  ins_pipe(pipe_slow);
-+%}
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
 +
-+instruct weakCompareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
-+  predicate(needs_acquiring_load_reserved(n));
-+  match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
++    // Build the base (index * per_case_size_in_bytes()) +
++    // case_array_offset_in_bytes()
++    mvw(reg2, in_bytes(MultiBranchData::per_case_size()));
++    mvw(t0, in_bytes(MultiBranchData::case_array_offset()));
++    Assembler::mul(index, index, reg2);
++    Assembler::add(index, index, t0);
 +
-+  effect(TEMP tmp, KILL cr);
-+  format %{
-+    "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapNAcq_shenandoah"
-+    "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)"
-+  %}
++    // Update the case count
++    increment_mdp_data_at(mdp,
++                          index,
++                          in_bytes(MultiBranchData::relative_count_offset()));
 +
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-+    // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
-+                                                   false /* is_cae */, $res$$Register);
-+  %}
++    // The method data pointer need to be updated.
++    update_mdp_by_offset(mdp,
++                         index,
++                         in_bytes(MultiBranchData::
++                                  relative_displacement_offset()));
 +
-+  ins_pipe(pipe_slow);
-+%}
++    bind(profile_continue);
++  }
++}
 +
-+instruct weakCompareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
-+  predicate(needs_acquiring_load_reserved(n));
-+  match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval)));
-+  ins_cost(10 * DEFAULT_COST);
++void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ; }
 +
-+  effect(TEMP tmp, KILL cr);
-+  format %{
-+    "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapPAcq_shenandoah"
-+    "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)"
-+  %}
++void InterpreterMacroAssembler::notify_method_entry() {
++  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
++  // track stack depth.  If it is possible to enter interp_only_mode we add
++  // the code to check if the event should be sent.
++  if (JvmtiExport::can_post_interpreter_events()) {
++    Label L;
++    lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset()));
++    beqz(x13, L);
++    call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                    InterpreterRuntime::post_method_entry));
++    bind(L);
++  }
 +
-+  ins_encode %{
-+    Register tmp = $tmp$$Register;
-+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
-+    // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop
-+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
-+                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
-+                                                   false /* is_cae */, $res$$Register);
-+  %}
++  {
++    SkipIfEqual skip(this, &DTraceMethodProbes, false);
++    get_method(c_rarg1);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
++                 xthread, c_rarg1);
++  }
 +
-+  ins_pipe(pipe_slow);
-+%}
-diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
-new file mode 100644
-index 00000000000..3d3f4d4d774
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
-@@ -0,0 +1,441 @@
-+/*
-+ * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  // RedefineClasses() tracing support for obsolete method entry
++  if (log_is_enabled(Trace, redefine, class, obsolete)) {
++    get_method(c_rarg1);
++    call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
++      xthread, c_rarg1);
++  }
++}
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "code/codeBlob.hpp"
-+#include "code/vmreg.inline.hpp"
-+#include "gc/z/zBarrier.inline.hpp"
-+#include "gc/z/zBarrierSet.hpp"
-+#include "gc/z/zBarrierSetAssembler.hpp"
-+#include "gc/z/zBarrierSetRuntime.hpp"
-+#include "gc/z/zThreadLocalData.hpp"
-+#include "memory/resourceArea.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "utilities/macros.hpp"
-+#ifdef COMPILER1
-+#include "c1/c1_LIRAssembler.hpp"
-+#include "c1/c1_MacroAssembler.hpp"
-+#include "gc/z/c1/zBarrierSetC1.hpp"
-+#endif // COMPILER1
-+#ifdef COMPILER2
-+#include "gc/z/c2/zBarrierSetC2.hpp"
-+#endif // COMPILER2
 +
-+#ifdef PRODUCT
-+#define BLOCK_COMMENT(str) /* nothing */
-+#else
-+#define BLOCK_COMMENT(str) __ block_comment(str)
-+#endif
++void InterpreterMacroAssembler::notify_method_exit(
++    TosState state, NotifyMethodExitMode mode) {
++  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
++  // track stack depth.  If it is possible to enter interp_only_mode we add
++  // the code to check if the event should be sent.
++  if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
++    Label L;
++    // Note: frame::interpreter_frame_result has a dependency on how the
++    // method result is saved across the call to post_method_exit. If this
++    // is changed then the interpreter_frame_result implementation will
++    // need to be updated too.
 +
-+#undef __
-+#define __ masm->
++    // template interpreter will leave the result on the top of the stack.
++    push(state);
++    lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset()));
++    beqz(x13, L);
++    call_VM(noreg,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
++    bind(L);
++    pop(state);
++  }
 +
-+void ZBarrierSetAssembler::load_at(MacroAssembler* masm,
-+                                   DecoratorSet decorators,
-+                                   BasicType type,
-+                                   Register dst,
-+                                   Address src,
-+                                   Register tmp1,
-+                                   Register tmp_thread) {
-+  if (!ZBarrierSet::barrier_needed(decorators, type)) {
-+    // Barrier not needed
-+    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
-+    return;
++  {
++    SkipIfEqual skip(this, &DTraceMethodProbes, false);
++    push(state);
++    get_method(c_rarg1);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
++                 xthread, c_rarg1);
++    pop(state);
 +  }
++}
 +
-+  assert_different_registers(t1, src.base());
-+  assert_different_registers(t0, t1, dst);
 +
++// Jump if ((*counter_addr += increment) & mask) satisfies the condition.
++void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr,
++                                                        int increment, Address mask,
++                                                        Register tmp1, Register tmp2,
++                                                        bool preloaded, Label* where) {
 +  Label done;
-+
-+  // Load bad mask into temp register.
-+  __ la(t0, src);
-+  __ ld(t1, address_bad_mask_from_thread(xthread));
-+  __ ld(dst, Address(t0));
-+
-+  // Test reference against bad mask. If mask bad, then we need to fix it up.
-+  __ andr(t1, dst, t1);
-+  __ beqz(t1, done);
-+
-+  __ enter();
-+
-+  __ push_call_clobbered_registers_except(RegSet::of(dst));
-+
-+  if (c_rarg0 != dst) {
-+    __ mv(c_rarg0, dst);
++  if (!preloaded) {
++    lwu(tmp1, counter_addr);
 +  }
++  add(tmp1, tmp1, increment);
++  sw(tmp1, counter_addr);
++  lwu(tmp2, mask);
++  andr(tmp1, tmp1, tmp2);
++  bnez(tmp1, done);
++  j(*where); // offset is too large so we have to use j instead of beqz here
++  bind(done);
++}
 +
-+  __ mv(c_rarg1, t0);
-+
-+  __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2);
-+
-+  // Make sure dst has the return value.
-+  if (dst != x10) {
-+    __ mv(dst, x10);
++void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point,
++                                                  int number_of_arguments) {
++  // interpreter specific
++  //
++  // Note: No need to save/restore rbcp & rlocals pointer since these
++  //       are callee saved registers and no blocking/ GC can happen
++  //       in leaf calls.
++#ifdef ASSERT
++  {
++   Label L;
++   ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++   beqz(t0, L);
++   stop("InterpreterMacroAssembler::call_VM_leaf_base:"
++        " last_sp != NULL");
++   bind(L);
 +  }
-+
-+  __ pop_call_clobbered_registers_except(RegSet::of(dst));
-+  __ leave();
-+
-+  __ bind(done);
++#endif /* ASSERT */
++  // super call
++  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
 +}
 +
++void InterpreterMacroAssembler::call_VM_base(Register oop_result,
++                                             Register java_thread,
++                                             Register last_java_sp,
++                                             address  entry_point,
++                                             int      number_of_arguments,
++                                             bool     check_exceptions) {
++  // interpreter specific
++  //
++  // Note: Could avoid restoring locals ptr (callee saved) - however doesn't
++  //       really make a difference for these runtime calls, since they are
++  //       slow anyway. Btw., bcp must be saved/restored since it may change
++  //       due to GC.
++  save_bcp();
 +#ifdef ASSERT
-+
-+void ZBarrierSetAssembler::store_at(MacroAssembler* masm,
-+                                    DecoratorSet decorators,
-+                                    BasicType type,
-+                                    Address dst,
-+                                    Register val,
-+                                    Register tmp1,
-+                                    Register tmp2) {
-+  // Verify value
-+  if (is_reference_type(type)) {
-+    // Note that src could be noreg, which means we
-+    // are storing null and can skip verification.
-+    if (val != noreg) {
-+      Label done;
-+
-+      // tmp1 and tmp2 are often set to noreg.
-+      RegSet savedRegs = RegSet::of(t0);
-+      __ push_reg(savedRegs, sp);
-+
-+      __ ld(t0, address_bad_mask_from_thread(xthread));
-+      __ andr(t0, val, t0);
-+      __ beqz(t0, done);
-+      __ stop("Verify oop store failed");
-+      __ should_not_reach_here();
-+      __ bind(done);
-+      __ pop_reg(savedRegs, sp);
-+    }
++  {
++    Label L;
++    ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++    beqz(t0, L);
++    stop("InterpreterMacroAssembler::call_VM_base:"
++         " last_sp != NULL");
++    bind(L);
 +  }
-+
-+  // Store value
-+  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
++#endif /* ASSERT */
++  // super call
++  MacroAssembler::call_VM_base(oop_result, noreg, last_java_sp,
++                               entry_point, number_of_arguments,
++                               check_exceptions);
++// interpreter specific
++  restore_bcp();
++  restore_locals();
 +}
 +
-+#endif // ASSERT
-+
-+void ZBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm,
-+                                              DecoratorSet decorators,
-+                                              bool is_oop,
-+                                              Register src,
-+                                              Register dst,
-+                                              Register count,
-+                                              RegSet saved_regs) {
-+  if (!is_oop) {
-+    // Barrier not needed
-+    return;
-+  }
++void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr, Register tmp) {
++  assert_different_registers(obj, tmp, t0, mdo_addr.base());
++  Label update, next, none;
 +
-+  BLOCK_COMMENT("ZBarrierSetAssembler::arraycopy_prologue {");
++  verify_oop(obj);
 +
-+  assert_different_registers(src, count, t0);
++  bnez(obj, update);
++  orptr(mdo_addr, TypeEntries::null_seen, t0, tmp);
++  j(next);
 +
-+  __ push_reg(saved_regs, sp);
++  bind(update);
++  load_klass(obj, obj);
 +
-+  if (count == c_rarg0 && src == c_rarg1) {
-+    // exactly backwards!!
-+    __ xorr(c_rarg0, c_rarg0, c_rarg1);
-+    __ xorr(c_rarg1, c_rarg0, c_rarg1);
-+    __ xorr(c_rarg0, c_rarg0, c_rarg1);
-+  } else {
-+    __ mv(c_rarg0, src);
-+    __ mv(c_rarg1, count);
-+  }
++  ld(t0, mdo_addr);
++  xorr(obj, obj, t0);
++  andi(t0, obj, TypeEntries::type_klass_mask);
++  beqz(t0, next); // klass seen before, nothing to
++                  // do. The unknown bit may have been
++                  // set already but no need to check.
 +
-+  __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_array_addr(), 2);
++  andi(t0, obj, TypeEntries::type_unknown);
++  bnez(t0, next);
++  // already unknown. Nothing to do anymore.
 +
-+  __ pop_reg(saved_regs, sp);
++  ld(t0, mdo_addr);
++  beqz(t0, none);
++  mv(tmp, (u1)TypeEntries::null_seen);
++  beq(t0, tmp, none);
++  // There is a chance that the checks above (re-reading profiling
++  // data from memory) fail if another thread has just set the
++  // profiling to this obj's klass
++  ld(t0, mdo_addr);
++  xorr(obj, obj, t0);
++  andi(t0, obj, TypeEntries::type_klass_mask);
++  beqz(t0, next);
 +
-+  BLOCK_COMMENT("} ZBarrierSetAssembler::arraycopy_prologue");
-+}
++  // different than before. Cannot keep accurate profile.
++  orptr(mdo_addr, TypeEntries::type_unknown, t0, tmp);
++  j(next);
 +
-+void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm,
-+                                                         Register jni_env,
-+                                                         Register robj,
-+                                                         Register tmp,
-+                                                         Label& slowpath) {
-+  BLOCK_COMMENT("ZBarrierSetAssembler::try_resolve_jobject_in_native {");
++  bind(none);
++  // first time here. Set profile type.
++  sd(obj, mdo_addr);
 +
-+  assert_different_registers(jni_env, robj, tmp);
++  bind(next);
++}
 +
-+  // Resolve jobject
-+  BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, robj, tmp, slowpath);
++void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) {
++  if (!ProfileInterpreter) {
++    return;
++  }
 +
-+  // Compute the offset of address bad mask from the field of jni_environment
-+  long int bad_mask_relative_offset = (long int) (in_bytes(ZThreadLocalData::address_bad_mask_offset()) -
-+                                                  in_bytes(JavaThread::jni_environment_offset()));
++  if (MethodData::profile_arguments() || MethodData::profile_return()) {
++    Label profile_continue;
 +
-+  // Load the address bad mask
-+  __ ld(tmp, Address(jni_env, bad_mask_relative_offset));
++    test_method_data_pointer(mdp, profile_continue);
 +
-+  // Check address bad mask
-+  __ andr(tmp, robj, tmp);
-+  __ bnez(tmp, slowpath);
++    int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size());
 +
-+  BLOCK_COMMENT("} ZBarrierSetAssembler::try_resolve_jobject_in_native");
-+}
++    lbu(t0, Address(mdp, in_bytes(DataLayout::tag_offset()) - off_to_start));
++    if (is_virtual) {
++      mv(tmp, (u1)DataLayout::virtual_call_type_data_tag);
++      bne(t0, tmp, profile_continue);
++    } else {
++      mv(tmp, (u1)DataLayout::call_type_data_tag);
++      bne(t0, tmp, profile_continue);
++    }
 +
-+#ifdef COMPILER2
++    // calculate slot step
++    static int stack_slot_offset0 = in_bytes(TypeEntriesAtCall::stack_slot_offset(0));
++    static int slot_step = in_bytes(TypeEntriesAtCall::stack_slot_offset(1)) - stack_slot_offset0;
 +
-+OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
-+  if (!OptoReg::is_reg(opto_reg)) {
-+    return OptoReg::Bad;
-+  }
++    // calculate type step
++    static int argument_type_offset0 = in_bytes(TypeEntriesAtCall::argument_type_offset(0));
++    static int type_step = in_bytes(TypeEntriesAtCall::argument_type_offset(1)) - argument_type_offset0;
 +
-+  const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
-+  if (vm_reg->is_FloatRegister()) {
-+    return opto_reg & ~1;
-+  }
++    if (MethodData::profile_arguments()) {
++      Label done, loop, loopEnd, profileArgument, profileReturnType;
++      RegSet pushed_registers;
++      pushed_registers += x15;
++      pushed_registers += x16;
++      pushed_registers += x17;
++      Register mdo_addr = x15;
++      Register index = x16;
++      Register off_to_args = x17;
++      push_reg(pushed_registers, sp);
 +
-+  return opto_reg;
-+}
++      mv(off_to_args, in_bytes(TypeEntriesAtCall::args_data_offset()));
++      mv(t0, TypeProfileArgsLimit);
++      beqz(t0, loopEnd);
 +
-+#undef __
-+#define __ _masm->
++      mv(index, zr); // index < TypeProfileArgsLimit
++      bind(loop);
++      bgtz(index, profileReturnType);
++      mv(t0, (int)MethodData::profile_return());
++      beqz(t0, profileArgument); // (index > 0 || MethodData::profile_return()) == false
++      bind(profileReturnType);
++      // If return value type is profiled we may have no argument to profile
++      ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())));
++      mv(t1, - TypeStackSlotEntries::per_arg_count());
++      mul(t1, index, t1);
++      add(tmp, tmp, t1);
++      mv(t1, TypeStackSlotEntries::per_arg_count());
++      add(t0, mdp, off_to_args);
++      blt(tmp, t1, done);
 +
-+class ZSaveLiveRegisters {
-+private:
-+  MacroAssembler* const _masm;
-+  RegSet                _gp_regs;
-+  FloatRegSet           _fp_regs;
-+  VectorRegSet          _vp_regs;
++      bind(profileArgument);
 +
-+public:
-+  void initialize(ZLoadBarrierStubC2* stub) {
-+    // Record registers that needs to be saved/restored
-+    RegMaskIterator rmi(stub->live());
-+    while (rmi.has_next()) {
-+      const OptoReg::Name opto_reg = rmi.next();
-+      if (OptoReg::is_reg(opto_reg)) {
-+        const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
-+        if (vm_reg->is_Register()) {
-+          _gp_regs += RegSet::of(vm_reg->as_Register());
-+        } else if (vm_reg->is_FloatRegister()) {
-+          _fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister());
-+        } else if (vm_reg->is_VectorRegister()) {
-+          const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~(VectorRegisterImpl::max_slots_per_register - 1));
-+          _vp_regs += VectorRegSet::of(vm_reg_base->as_VectorRegister());
-+        } else {
-+          fatal("Unknown register type");
-+        }
-+      }
-+    }
++      ld(tmp, Address(callee, Method::const_offset()));
++      load_unsigned_short(tmp, Address(tmp, ConstMethod::size_of_parameters_offset()));
++      // stack offset o (zero based) from the start of the argument
++      // list, for n arguments translates into offset n - o - 1 from
++      // the end of the argument list
++      mv(t0, stack_slot_offset0);
++      mv(t1, slot_step);
++      mul(t1, index, t1);
++      add(t0, t0, t1);
++      add(t0, mdp, t0);
++      ld(t0, Address(t0));
++      sub(tmp, tmp, t0);
++      addi(tmp, tmp, -1);
++      Address arg_addr = argument_address(tmp);
++      ld(tmp, arg_addr);
 +
-+    // Remove C-ABI SOE registers, tmp regs and _ref register that will be updated
-+    _gp_regs -= RegSet::range(x18, x27) + RegSet::of(x2) + RegSet::of(x8, x9) + RegSet::of(x5, stub->ref());
-+  }
++      mv(t0, argument_type_offset0);
++      mv(t1, type_step);
++      mul(t1, index, t1);
++      add(t0, t0, t1);
++      add(mdo_addr, mdp, t0);
++      Address mdo_arg_addr(mdo_addr, 0);
++      profile_obj_type(tmp, mdo_arg_addr, t1);
 +
-+  ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
-+      _masm(masm),
-+      _gp_regs(),
-+      _fp_regs(),
-+      _vp_regs() {
-+    // Figure out what registers to save/restore
-+    initialize(stub);
++      int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
++      addi(off_to_args, off_to_args, to_add);
 +
-+    // Save registers
-+    __ push_reg(_gp_regs, sp);
-+    __ push_fp(_fp_regs, sp);
-+    __ push_vp(_vp_regs, sp);
-+  }
++      // increment index by 1
++      addi(index, index, 1);
++      mv(t1, TypeProfileArgsLimit);
++      blt(index, t1, loop);
++      bind(loopEnd);
 +
-+  ~ZSaveLiveRegisters() {
-+    // Restore registers
-+    __ pop_vp(_vp_regs, sp);
-+    __ pop_fp(_fp_regs, sp);
-+    __ pop_reg(_gp_regs, sp);
-+  }
-+};
++      if (MethodData::profile_return()) {
++        ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())));
++        addi(tmp, tmp, -TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count());
++      }
 +
-+class ZSetupArguments {
-+private:
-+  MacroAssembler* const _masm;
-+  const Register        _ref;
-+  const Address         _ref_addr;
++      add(t0, mdp, off_to_args);
++      bind(done);
++      mv(mdp, t0);
 +
-+public:
-+  ZSetupArguments(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
-+      _masm(masm),
-+      _ref(stub->ref()),
-+      _ref_addr(stub->ref_addr()) {
-+
-+    // Setup arguments
-+    if (_ref_addr.base() == noreg) {
-+      // No self healing
-+      if (_ref != c_rarg0) {
-+        __ mv(c_rarg0, _ref);
++      // unspill the clobbered registers
++      pop_reg(pushed_registers, sp);
++
++      if (MethodData::profile_return()) {
++        // We're right after the type profile for the last
++        // argument. tmp is the number of cells left in the
++        // CallTypeData/VirtualCallTypeData to reach its end. Non null
++        // if there's a return to profile.
++        assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
++        shadd(mdp, tmp, mdp, tmp, exact_log2(DataLayout::cell_size));
 +      }
-+      __ mv(c_rarg1, zr);
++      sd(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
 +    } else {
-+      // Self healing
-+      if (_ref == c_rarg0) {
-+        // _ref is already at correct place
-+        __ la(c_rarg1, _ref_addr);
-+      } else if (_ref != c_rarg1) {
-+        // _ref is in wrong place, but not in c_rarg1, so fix it first
-+        __ la(c_rarg1, _ref_addr);
-+        __ mv(c_rarg0, _ref);
-+      } else if (_ref_addr.base() != c_rarg0) {
-+        assert(_ref == c_rarg1, "Mov ref first, vacating c_rarg0");
-+        __ mv(c_rarg0, _ref);
-+        __ la(c_rarg1, _ref_addr);
-+      } else {
-+        assert(_ref == c_rarg1, "Need to vacate c_rarg1 and _ref_addr is using c_rarg0");
-+        if (_ref_addr.base() == c_rarg0) {
-+          __ mv(t1, c_rarg1);
-+          __ la(c_rarg1, _ref_addr);
-+          __ mv(c_rarg0, t1);
-+        } else {
-+          ShouldNotReachHere();
-+        }
-+      }
-+    }
-+  }
-+
-+  ~ZSetupArguments() {
-+    // Transfer result
-+    if (_ref != x10) {
-+      __ mv(_ref, x10);
++      assert(MethodData::profile_return(), "either profile call args or call ret");
++      update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size()));
 +    }
-+  }
-+};
-+
-+#undef __
-+#define __ masm->
 +
-+void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const {
-+  BLOCK_COMMENT("ZLoadBarrierStubC2");
-+
-+  // Stub entry
-+  __ bind(*stub->entry());
++    // mdp points right after the end of the
++    // CallTypeData/VirtualCallTypeData, right after the cells for the
++    // return value type if there's one
 +
-+  {
-+    ZSaveLiveRegisters save_live_registers(masm, stub);
-+    ZSetupArguments setup_arguments(masm, stub);
-+    int32_t offset = 0;
-+    __ la_patchable(t0, stub->slow_path(), offset);
-+    __ jalr(x1, t0, offset);
++    bind(profile_continue);
 +  }
-+
-+  // Stub exit
-+  __ j(*stub->continuation());
 +}
 +
-+#undef __
-+
-+#endif // COMPILER2
-+
-+#ifdef COMPILER1
-+#undef __
-+#define __ ce->masm()->
-+
-+void ZBarrierSetAssembler::generate_c1_load_barrier_test(LIR_Assembler* ce,
-+                                                         LIR_Opr ref) const {
-+  assert_different_registers(xthread, ref->as_register(), t1);
-+  __ ld(t1, address_bad_mask_from_thread(xthread));
-+  __ andr(t1, t1, ref->as_register());
-+}
++void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) {
++  assert_different_registers(mdp, ret, tmp, xbcp, t0, t1);
++  if (ProfileInterpreter && MethodData::profile_return()) {
++    Label profile_continue, done;
 +
-+void ZBarrierSetAssembler::generate_c1_load_barrier_stub(LIR_Assembler* ce,
-+                                                         ZLoadBarrierStubC1* stub) const {
-+  // Stub entry
-+  __ bind(*stub->entry());
++    test_method_data_pointer(mdp, profile_continue);
 +
-+  Register ref = stub->ref()->as_register();
-+  Register ref_addr = noreg;
-+  Register tmp = noreg;
++    if (MethodData::profile_return_jsr292_only()) {
++      assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
 +
-+  if (stub->tmp()->is_valid()) {
-+    // Load address into tmp register
-+    ce->leal(stub->ref_addr(), stub->tmp());
-+    ref_addr = tmp = stub->tmp()->as_pointer_register();
-+  } else {
-+    // Address already in register
-+    ref_addr = stub->ref_addr()->as_address_ptr()->base()->as_pointer_register();
-+  }
++      // If we don't profile all invoke bytecodes we must make sure
++      // it's a bytecode we indeed profile. We can't go back to the
++      // begining of the ProfileData we intend to update to check its
++      // type because we're right after it and we don't known its
++      // length
++      Label do_profile;
++      lbu(t0, Address(xbcp, 0));
++      mv(tmp, (u1)Bytecodes::_invokedynamic);
++      beq(t0, tmp, do_profile);
++      mv(tmp, (u1)Bytecodes::_invokehandle);
++      beq(t0, tmp, do_profile);
++      get_method(tmp);
++      lhu(t0, Address(tmp, Method::intrinsic_id_offset_in_bytes()));
++      mv(t1, vmIntrinsics::_compiledLambdaForm);
++      bne(t0, t1, profile_continue);
++      bind(do_profile);
++    }
 +
-+  assert_different_registers(ref, ref_addr, noreg);
++    Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size()));
++    mv(tmp, ret);
++    profile_obj_type(tmp, mdo_ret_addr, t1);
 +
-+  // Save x10 unless it is the result or tmp register
-+  // Set up SP to accomodate parameters and maybe x10.
-+  if (ref != x10 && tmp != x10) {
-+    __ sub(sp, sp, 32);
-+    __ sd(x10, Address(sp, 16));
-+  } else {
-+    __ sub(sp, sp, 16);
++    bind(profile_continue);
 +  }
++}
 +
-+  // Setup arguments and call runtime stub
-+  ce->store_parameter(ref_addr, 1);
-+  ce->store_parameter(ref, 0);
-+
-+  __ far_call(stub->runtime_stub());
++void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3) {
++  assert_different_registers(t0, t1, mdp, tmp1, tmp2, tmp3);
++  if (ProfileInterpreter && MethodData::profile_parameters()) {
++    Label profile_continue, done;
 +
-+  // Verify result
-+  __ verify_oop(x10, "Bad oop");
++    test_method_data_pointer(mdp, profile_continue);
 +
++    // Load the offset of the area within the MDO used for
++    // parameters. If it's negative we're not profiling any parameters
++    lwu(tmp1, Address(mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset())));
++    srli(tmp2, tmp1, 31);
++    bnez(tmp2, profile_continue);  // i.e. sign bit set
 +
-+  // Move result into place
-+  if (ref != x10) {
-+    __ mv(ref, x10);
-+  }
++    // Compute a pointer to the area for parameters from the offset
++    // and move the pointer to the slot for the last
++    // parameters. Collect profiling from last parameter down.
++    // mdo start + parameters offset + array length - 1
++    add(mdp, mdp, tmp1);
++    ld(tmp1, Address(mdp, ArrayData::array_len_offset()));
++    add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count());
 +
-+  // Restore x10 unless it is the result or tmp register
-+  if (ref != x10 && tmp != x10) {
-+    __ ld(x10, Address(sp, 16));
-+    __ add(sp, sp, 32);
-+  } else {
-+    __ add(sp, sp, 16);
-+  }
++    Label loop;
++    bind(loop);
 +
-+  // Stub exit
-+  __ j(*stub->continuation());
-+}
++    int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0));
++    int type_base = in_bytes(ParametersTypeData::type_offset(0));
++    int per_arg_scale = exact_log2(DataLayout::cell_size);
++    add(t0, mdp, off_base);
++    add(t1, mdp, type_base);
 +
-+#undef __
-+#define __ sasm->
++    shadd(tmp2, tmp1, t0, tmp2, per_arg_scale);
++    // load offset on the stack from the slot for this parameter
++    ld(tmp2, Address(tmp2, 0));
++    neg(tmp2, tmp2);
 +
-+void ZBarrierSetAssembler::generate_c1_load_barrier_runtime_stub(StubAssembler* sasm,
-+                                                                 DecoratorSet decorators) const {
-+  __ prologue("zgc_load_barrier stub", false);
++    // read the parameter from the local area
++    shadd(tmp2, tmp2, xlocals, tmp2, Interpreter::logStackElementSize);
++    ld(tmp2, Address(tmp2, 0));
 +
-+  __ push_call_clobbered_registers_except(RegSet::of(x10));
++    // profile the parameter
++    shadd(t1, tmp1, t1, t0, per_arg_scale);
++    Address arg_type(t1, 0);
++    profile_obj_type(tmp2, arg_type, tmp3);
 +
-+  // Setup arguments
-+  __ load_parameter(0, c_rarg0);
-+  __ load_parameter(1, c_rarg1);
++    // go to next parameter
++    add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count());
++    bgez(tmp1, loop);
 +
-+  __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2);
++    bind(profile_continue);
++  }
++}
 +
-+  __ pop_call_clobbered_registers_except(RegSet::of(x10));
++void InterpreterMacroAssembler::get_method_counters(Register method,
++                                                    Register mcs, Label& skip) {
++  Label has_counters;
++  ld(mcs, Address(method, Method::method_counters_offset()));
++  bnez(mcs, has_counters);
++  call_VM(noreg, CAST_FROM_FN_PTR(address,
++          InterpreterRuntime::build_method_counters), method);
++  ld(mcs, Address(method, Method::method_counters_offset()));
++  beqz(mcs, skip); // No MethodCounters allocated, OutOfMemory
++  bind(has_counters);
++}
 +
-+  __ epilogue();
++#ifdef ASSERT
++void InterpreterMacroAssembler::verify_access_flags(Register access_flags, uint32_t flag_bits,
++                                                    const char* msg, bool stop_by_hit) {
++  Label L;
++  andi(t0, access_flags, flag_bits);
++  if (stop_by_hit) {
++    beqz(t0, L);
++  } else {
++    bnez(t0, L);
++  }
++  stop(msg);
++  bind(L);
 +}
 +
-+#undef __
-+#endif // COMPILER1
-diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp
++void InterpreterMacroAssembler::verify_frame_setup() {
++  Label L;
++  const Address monitor_block_top(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++  ld(t0, monitor_block_top);
++  beq(esp, t0, L);
++  stop("broken stack frame setup in interpreter");
++  bind(L);
++}
++#endif
+diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
 new file mode 100644
-index 00000000000..dc07ab635fe
+index 0000000000..4126e8ee70
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp
-@@ -0,0 +1,101 @@
++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
+@@ -0,0 +1,283 @@
 +/*
-+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -18553,91 +17920,273 @@ index 00000000000..dc07ab635fe
 + *
 + */
 +
-+#ifndef CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP
-+#define CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP
++#ifndef CPU_RISCV_INTERP_MASM_RISCV_HPP
++#define CPU_RISCV_INTERP_MASM_RISCV_HPP
 +
-+#include "code/vmreg.hpp"
-+#include "oops/accessDecorators.hpp"
-+#ifdef COMPILER2
-+#include "opto/optoreg.hpp"
-+#endif // COMPILER2
++#include "asm/macroAssembler.hpp"
++#include "interpreter/invocationCounter.hpp"
++#include "runtime/frame.hpp"
 +
-+#ifdef COMPILER1
-+class LIR_Assembler;
-+class LIR_Opr;
-+class StubAssembler;
-+class ZLoadBarrierStubC1;
-+#endif // COMPILER1
++// This file specializes the assember with interpreter-specific macros
 +
-+#ifdef COMPILER2
-+class Node;
-+class ZLoadBarrierStubC2;
-+#endif // COMPILER2
++typedef ByteSize (*OffsetFunction)(uint);
 +
-+class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase {
-+public:
-+  virtual void load_at(MacroAssembler* masm,
-+                       DecoratorSet decorators,
-+                       BasicType type,
-+                       Register dst,
-+                       Address src,
-+                       Register tmp1,
-+                       Register tmp_thread);
++class InterpreterMacroAssembler: public MacroAssembler {
++ protected:
++  // Interpreter specific version of call_VM_base
++  using MacroAssembler::call_VM_leaf_base;
 +
-+#ifdef ASSERT
-+  virtual void store_at(MacroAssembler* masm,
-+                        DecoratorSet decorators,
-+                        BasicType type,
-+                        Address dst,
-+                        Register val,
-+                        Register tmp1,
-+                        Register tmp2);
-+#endif // ASSERT
++  virtual void call_VM_leaf_base(address entry_point,
++                                 int number_of_arguments);
 +
-+  virtual void arraycopy_prologue(MacroAssembler* masm,
-+                                  DecoratorSet decorators,
-+                                  bool is_oop,
-+                                  Register src,
-+                                  Register dst,
-+                                  Register count,
-+                                  RegSet saved_regs);
++  virtual void call_VM_base(Register oop_result,
++                            Register java_thread,
++                            Register last_java_sp,
++                            address  entry_point,
++                            int number_of_arguments,
++                            bool check_exceptions);
 +
-+  virtual void try_resolve_jobject_in_native(MacroAssembler* masm,
-+                                             Register jni_env,
-+                                             Register robj,
-+                                             Register tmp,
-+                                             Label& slowpath);
++  // base routine for all dispatches
++  void dispatch_base(TosState state, address* table, bool verifyoop = true,
++                     bool generate_poll = false, Register Rs = t0);
 +
-+#ifdef COMPILER1
-+  void generate_c1_load_barrier_test(LIR_Assembler* ce,
-+                                     LIR_Opr ref) const;
++ public:
++  InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {}
++  virtual ~InterpreterMacroAssembler() {}
 +
-+  void generate_c1_load_barrier_stub(LIR_Assembler* ce,
-+                                     ZLoadBarrierStubC1* stub) const;
++  void load_earlyret_value(TosState state);
 +
-+  void generate_c1_load_barrier_runtime_stub(StubAssembler* sasm,
-+                                             DecoratorSet decorators) const;
-+#endif // COMPILER1
++  void jump_to_entry(address entry);
 +
-+#ifdef COMPILER2
-+  OptoReg::Name refine_register(const Node* node,
-+                                OptoReg::Name opto_reg);
++  virtual void check_and_handle_popframe(Register java_thread);
++  virtual void check_and_handle_earlyret(Register java_thread);
 +
-+  void generate_c2_load_barrier_stub(MacroAssembler* masm,
-+                                     ZLoadBarrierStubC2* stub) const;
-+#endif // COMPILER2
++  // Interpreter-specific registers
++  void save_bcp() {
++    sd(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize));
++  }
++
++  void restore_bcp() {
++    ld(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize));
++  }
++
++  void restore_locals() {
++    ld(xlocals, Address(fp, frame::interpreter_frame_locals_offset * wordSize));
++  }
++
++  void restore_constant_pool_cache() {
++    ld(xcpool, Address(fp, frame::interpreter_frame_cache_offset * wordSize));
++  }
++
++  void get_dispatch();
++
++  // Helpers for runtime call arguments/results
++  void get_method(Register reg) {
++    ld(reg, Address(fp, frame::interpreter_frame_method_offset * wordSize));
++  }
++
++  void get_const(Register reg) {
++    get_method(reg);
++    ld(reg, Address(reg, in_bytes(Method::const_offset())));
++  }
++
++  void get_constant_pool(Register reg) {
++    get_const(reg);
++    ld(reg, Address(reg, in_bytes(ConstMethod::constants_offset())));
++  }
++
++  void get_constant_pool_cache(Register reg) {
++    get_constant_pool(reg);
++    ld(reg, Address(reg, ConstantPool::cache_offset_in_bytes()));
++  }
++
++  void get_cpool_and_tags(Register cpool, Register tags) {
++    get_constant_pool(cpool);
++    ld(tags, Address(cpool, ConstantPool::tags_offset_in_bytes()));
++  }
++
++  void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset);
++  void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_method_counters(Register method, Register mcs, Label& skip);
++
++  // Load cpool->resolved_references(index).
++  void load_resolved_reference_at_index(Register result, Register index, Register tmp = x15);
++
++  // Load cpool->resolved_klass_at(index).
++  void load_resolved_klass_at_offset(Register cpool, Register index, Register klass, Register temp);
++
++  void pop_ptr(Register r = x10);
++  void pop_i(Register r = x10);
++  void pop_l(Register r = x10);
++  void pop_f(FloatRegister r = f10);
++  void pop_d(FloatRegister r = f10);
++  void push_ptr(Register r = x10);
++  void push_i(Register r = x10);
++  void push_l(Register r = x10);
++  void push_f(FloatRegister r = f10);
++  void push_d(FloatRegister r = f10);
++
++  void pop(TosState state); // transition vtos -> state
++  void push(TosState state); // transition state -> vtos
++
++  void empty_expression_stack() {
++    ld(esp, Address(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize));
++    // NULL last_sp until next java call
++    sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++  }
++
++  // Helpers for swap and dup
++  void load_ptr(int n, Register val);
++  void store_ptr(int n, Register val);
++
++  // Load float value from 'address'. The value is loaded onto the FPU register v0.
++  void load_float(Address src);
++  void load_double(Address src);
++
++  // Generate a subtype check: branch to ok_is_subtype if sub_klass is
++  // a subtype of super_klass.
++  void gen_subtype_check( Register sub_klass, Label &ok_is_subtype );
++
++  // Dispatching
++  void dispatch_prolog(TosState state, int step = 0);
++  void dispatch_epilog(TosState state, int step = 0);
++  // dispatch via t0
++  void dispatch_only(TosState state, bool generate_poll = false, Register Rs = t0);
++  // dispatch normal table via t0 (assume t0 is loaded already)
++  void dispatch_only_normal(TosState state, Register Rs = t0);
++  void dispatch_only_noverify(TosState state, Register Rs = t0);
++  // load t0 from [xbcp + step] and dispatch via t0
++  void dispatch_next(TosState state, int step = 0, bool generate_poll = false);
++  // load t0 from [xbcp] and dispatch via t0 and table
++  void dispatch_via (TosState state, address* table);
++
++  // jump to an invoked target
++  void prepare_to_jump_from_interpreted();
++  void jump_from_interpreted(Register method);
++
++
++  // Returning from interpreted functions
++  //
++  // Removes the current activation (incl. unlocking of monitors)
++  // and sets up the return address.  This code is also used for
++  // exception unwindwing. In that case, we do not want to throw
++  // IllegalMonitorStateExceptions, since that might get us into an
++  // infinite rethrow exception loop.
++  // Additionally this code is used for popFrame and earlyReturn.
++  // In popFrame case we want to skip throwing an exception,
++  // installing an exception, and notifying jvmdi.
++  // In earlyReturn case we only want to skip throwing an exception
++  // and installing an exception.
++  void remove_activation(TosState state,
++                         bool throw_monitor_exception = true,
++                         bool install_monitor_exception = true,
++                         bool notify_jvmdi = true);
++
++  // FIXME: Give us a valid frame at a null check.
++  virtual void null_check(Register reg, int offset = -1) {
++        MacroAssembler::null_check(reg, offset);
++  }
++
++  // Object locking
++  void lock_object  (Register lock_reg);
++  void unlock_object(Register lock_reg);
++
++  // Interpreter profiling operations
++  void set_method_data_pointer_for_bcp();
++  void test_method_data_pointer(Register mdp, Label& zero_continue);
++  void verify_method_data_pointer();
++
++  void set_mdp_data_at(Register mdp_in, int constant, Register value);
++  void increment_mdp_data_at(Address data, bool decrement = false);
++  void increment_mdp_data_at(Register mdp_in, int constant,
++                             bool decrement = false);
++  void increment_mdp_data_at(Register mdp_in, Register reg, int constant,
++                             bool decrement = false);
++  void increment_mask_and_jump(Address counter_addr,
++                               int increment, Address mask,
++                               Register tmp1, Register tmp2,
++                               bool preloaded, Label* where);
++
++  void set_mdp_flag_at(Register mdp_in, int flag_constant);
++  void test_mdp_data_at(Register mdp_in, int offset, Register value,
++                        Register test_value_out,
++                        Label& not_equal_continue);
++
++  void record_klass_in_profile(Register receiver, Register mdp,
++                               Register reg2, bool is_virtual_call);
++  void record_klass_in_profile_helper(Register receiver, Register mdp,
++                                      Register reg2,
++                                      Label& done, bool is_virtual_call);
++  void record_item_in_profile_helper(Register item, Register mdp,
++                                     Register reg2, int start_row, Label& done, int total_rows,
++                                     OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn,
++                                     int non_profiled_offset);
++
++  void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
++  void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp);
++  void update_mdp_by_constant(Register mdp_in, int constant);
++  void update_mdp_for_ret(Register return_bci);
++
++  // narrow int return value
++  void narrow(Register result);
++
++  void profile_taken_branch(Register mdp, Register bumped_count);
++  void profile_not_taken_branch(Register mdp);
++  void profile_call(Register mdp);
++  void profile_final_call(Register mdp);
++  void profile_virtual_call(Register receiver, Register mdp,
++                            Register t1,
++                            bool receiver_can_be_null = false);
++  void profile_ret(Register return_bci, Register mdp);
++  void profile_null_seen(Register mdp);
++  void profile_typecheck(Register mdp, Register klass, Register temp);
++  void profile_typecheck_failed(Register mdp);
++  void profile_switch_default(Register mdp);
++  void profile_switch_case(Register index_in_scratch, Register mdp,
++                           Register temp);
++
++  void profile_obj_type(Register obj, const Address& mdo_addr, Register tmp);
++  void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual);
++  void profile_return_type(Register mdp, Register ret, Register tmp);
++  void profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3);
++
++  // Debugging
++  // only if +VerifyFPU  && (state == ftos || state == dtos)
++  void verify_FPU(int stack_depth, TosState state = ftos);
++
++  typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode;
++
++  // support for jvmti/dtrace
++  void notify_method_entry();
++  void notify_method_exit(TosState state, NotifyMethodExitMode mode);
++
++  virtual void _call_Unimplemented(address call_site) {
++    save_bcp();
++    set_last_Java_frame(esp, fp, (address) pc(), t0);
++    MacroAssembler::_call_Unimplemented(call_site);
++  }
++
++#ifdef ASSERT
++  void verify_access_flags(Register access_flags, uint32_t flag_bits,
++                           const char* msg, bool stop_by_hit = true);
++  void verify_frame_setup();
++#endif
 +};
 +
-+#endif // CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp
++#endif // CPU_RISCV_INTERP_MASM_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
 new file mode 100644
-index 00000000000..d14997790af
+index 0000000000..b5e6b8c512
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp
-@@ -0,0 +1,212 @@
++++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
+@@ -0,0 +1,305 @@
 +/*
-+ * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -18661,483 +18210,294 @@ index 00000000000..d14997790af
 + */
 +
 +#include "precompiled.hpp"
-+#include "gc/shared/gcLogPrecious.hpp"
-+#include "gc/shared/gc_globals.hpp"
-+#include "gc/z/zGlobals.hpp"
-+#include "runtime/globals.hpp"
-+#include "runtime/os.hpp"
-+#include "utilities/globalDefinitions.hpp"
-+#include "utilities/powerOfTwo.hpp"
-+
-+#ifdef LINUX
-+#include <sys/mman.h>
-+#endif // LINUX
-+
-+//
-+// The heap can have three different layouts, depending on the max heap size.
-+//
-+// Address Space & Pointer Layout 1
-+// --------------------------------
-+//
-+//  +--------------------------------+ 0x00007FFFFFFFFFFF (127TB)
-+//  .                                .
-+//  .                                .
-+//  .                                .
-+//  +--------------------------------+ 0x0000014000000000 (20TB)
-+//  |         Remapped View          |
-+//  +--------------------------------+ 0x0000010000000000 (16TB)
-+//  .                                .
-+//  +--------------------------------+ 0x00000c0000000000 (12TB)
-+//  |         Marked1 View           |
-+//  +--------------------------------+ 0x0000080000000000 (8TB)
-+//  |         Marked0 View           |
-+//  +--------------------------------+ 0x0000040000000000 (4TB)
-+//  .                                .
-+//  +--------------------------------+ 0x0000000000000000
-+//
-+//   6                  4 4  4 4
-+//   3                  6 5  2 1                                             0
-+//  +--------------------+----+-----------------------------------------------+
-+//  |00000000 00000000 00|1111|11 11111111 11111111 11111111 11111111 11111111|
-+//  +--------------------+----+-----------------------------------------------+
-+//  |                    |    |
-+//  |                    |    * 41-0 Object Offset (42-bits, 4TB address space)
-+//  |                    |
-+//  |                    * 45-42 Metadata Bits (4-bits)  0001 = Marked0      (Address view 4-8TB)
-+//  |                                                    0010 = Marked1      (Address view 8-12TB)
-+//  |                                                    0100 = Remapped     (Address view 16-20TB)
-+//  |                                                    1000 = Finalizable  (Address view N/A)
-+//  |
-+//  * 63-46 Fixed (18-bits, always zero)
-+//
-+//
-+// Address Space & Pointer Layout 2
-+// --------------------------------
-+//
-+//  +--------------------------------+ 0x00007FFFFFFFFFFF (127TB)
-+//  .                                .
-+//  .                                .
-+//  .                                .
-+//  +--------------------------------+ 0x0000280000000000 (40TB)
-+//  |         Remapped View          |
-+//  +--------------------------------+ 0x0000200000000000 (32TB)
-+//  .                                .
-+//  +--------------------------------+ 0x0000180000000000 (24TB)
-+//  |         Marked1 View           |
-+//  +--------------------------------+ 0x0000100000000000 (16TB)
-+//  |         Marked0 View           |
-+//  +--------------------------------+ 0x0000080000000000 (8TB)
-+//  .                                .
-+//  +--------------------------------+ 0x0000000000000000
-+//
-+//   6                 4 4  4 4
-+//   3                 7 6  3 2                                              0
-+//  +------------------+-----+------------------------------------------------+
-+//  |00000000 00000000 0|1111|111 11111111 11111111 11111111 11111111 11111111|
-+//  +-------------------+----+------------------------------------------------+
-+//  |                   |    |
-+//  |                   |    * 42-0 Object Offset (43-bits, 8TB address space)
-+//  |                   |
-+//  |                   * 46-43 Metadata Bits (4-bits)  0001 = Marked0      (Address view 8-16TB)
-+//  |                                                   0010 = Marked1      (Address view 16-24TB)
-+//  |                                                   0100 = Remapped     (Address view 32-40TB)
-+//  |                                                   1000 = Finalizable  (Address view N/A)
-+//  |
-+//  * 63-47 Fixed (17-bits, always zero)
-+//
-+//
-+// Address Space & Pointer Layout 3
-+// --------------------------------
-+//
-+//  +--------------------------------+ 0x00007FFFFFFFFFFF (127TB)
-+//  .                                .
-+//  .                                .
-+//  .                                .
-+//  +--------------------------------+ 0x0000500000000000 (80TB)
-+//  |         Remapped View          |
-+//  +--------------------------------+ 0x0000400000000000 (64TB)
-+//  .                                .
-+//  +--------------------------------+ 0x0000300000000000 (48TB)
-+//  |         Marked1 View           |
-+//  +--------------------------------+ 0x0000200000000000 (32TB)
-+//  |         Marked0 View           |
-+//  +--------------------------------+ 0x0000100000000000 (16TB)
-+//  .                                .
-+//  +--------------------------------+ 0x0000000000000000
-+//
-+//   6               4  4  4 4
-+//   3               8  7  4 3                                               0
-+//  +------------------+----+-------------------------------------------------+
-+//  |00000000 00000000 |1111|1111 11111111 11111111 11111111 11111111 11111111|
-+//  +------------------+----+-------------------------------------------------+
-+//  |                  |    |
-+//  |                  |    * 43-0 Object Offset (44-bits, 16TB address space)
-+//  |                  |
-+//  |                  * 47-44 Metadata Bits (4-bits)  0001 = Marked0      (Address view 16-32TB)
-+//  |                                                  0010 = Marked1      (Address view 32-48TB)
-+//  |                                                  0100 = Remapped     (Address view 64-80TB)
-+//  |                                                  1000 = Finalizable  (Address view N/A)
-+//  |
-+//  * 63-48 Fixed (16-bits, always zero)
-+//
++#include "asm/macroAssembler.inline.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "memory/allocation.inline.hpp"
++#include "memory/universe.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/icache.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/signature.hpp"
 +
-+// Default value if probing is not implemented for a certain platform: 128TB
-+static const size_t DEFAULT_MAX_ADDRESS_BIT = 47;
-+// Minimum value returned, if probing fails: 64GB
-+static const size_t MINIMUM_MAX_ADDRESS_BIT = 36;
-+
-+static size_t probe_valid_max_address_bit() {
-+#ifdef LINUX
-+  size_t max_address_bit = 0;
-+  const size_t page_size = os::vm_page_size();
-+  for (size_t i = DEFAULT_MAX_ADDRESS_BIT; i > MINIMUM_MAX_ADDRESS_BIT; --i) {
-+    const uintptr_t base_addr = ((uintptr_t) 1U) << i;
-+    if (msync((void*)base_addr, page_size, MS_ASYNC) == 0) {
-+      // msync suceeded, the address is valid, and maybe even already mapped.
-+      max_address_bit = i;
-+      break;
-+    }
-+    if (errno != ENOMEM) {
-+      // Some error occured. This should never happen, but msync
-+      // has some undefined behavior, hence ignore this bit.
-+#ifdef ASSERT
-+      fatal("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno));
-+#else // ASSERT
-+      log_warning_p(gc)("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno));
-+#endif // ASSERT
-+      continue;
-+    }
-+    // Since msync failed with ENOMEM, the page might not be mapped.
-+    // Try to map it, to see if the address is valid.
-+    void* const result_addr = mmap((void*) base_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0);
-+    if (result_addr != MAP_FAILED) {
-+      munmap(result_addr, page_size);
-+    }
-+    if ((uintptr_t) result_addr == base_addr) {
-+      // address is valid
-+      max_address_bit = i;
-+      break;
-+    }
-+  }
-+  if (max_address_bit == 0) {
-+    // probing failed, allocate a very high page and take that bit as the maximum
-+    const uintptr_t high_addr = ((uintptr_t) 1U) << DEFAULT_MAX_ADDRESS_BIT;
-+    void* const result_addr = mmap((void*) high_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0);
-+    if (result_addr != MAP_FAILED) {
-+      max_address_bit = BitsPerSize_t - count_leading_zeros((size_t) result_addr) - 1;
-+      munmap(result_addr, page_size);
-+    }
++#define __ _masm->
++
++// Implementation of SignatureHandlerGenerator
++Register InterpreterRuntime::SignatureHandlerGenerator::from() { return xlocals; }
++Register InterpreterRuntime::SignatureHandlerGenerator::to()   { return sp; }
++Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return t0; }
++
++Register InterpreterRuntime::SignatureHandlerGenerator::next_gpr() {
++  if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) {
++    return g_INTArgReg[++_num_reg_int_args];
 +  }
-+  log_info_p(gc, init)("Probing address space for the highest valid bit: " SIZE_FORMAT, max_address_bit);
-+  return MAX2(max_address_bit, MINIMUM_MAX_ADDRESS_BIT);
-+#else // LINUX
-+  return DEFAULT_MAX_ADDRESS_BIT;
-+#endif // LINUX
++  return noreg;
 +}
 +
-+size_t ZPlatformAddressOffsetBits() {
-+  const static size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1;
-+  const size_t max_address_offset_bits = valid_max_address_offset_bits - 3;
-+  const size_t min_address_offset_bits = max_address_offset_bits - 2;
-+  const size_t address_offset = round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio);
-+  const size_t address_offset_bits = log2i_exact(address_offset);
-+  return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits);
++FloatRegister InterpreterRuntime::SignatureHandlerGenerator::next_fpr() {
++  if (_num_reg_fp_args < Argument::n_float_register_parameters_c) {
++    return g_FPArgReg[_num_reg_fp_args++];
++  } else {
++    return fnoreg;
++  }
 +}
 +
-+size_t ZPlatformAddressMetadataShift() {
-+  return ZPlatformAddressOffsetBits();
++int InterpreterRuntime::SignatureHandlerGenerator::next_stack_offset() {
++  int ret = _stack_offset;
++  _stack_offset += wordSize;
++  return ret;
 +}
-diff --git a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp
-new file mode 100644
-index 00000000000..f20ecd9b073
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp
-@@ -0,0 +1,36 @@
-+/*
-+ * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#ifndef CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP
-+#define CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP
 +
-+const size_t ZPlatformGranuleSizeShift = 21; // 2MB
-+const size_t ZPlatformHeapViews        = 3;
-+const size_t ZPlatformCacheLineSize    = 64;
-+
-+size_t ZPlatformAddressOffsetBits();
-+size_t ZPlatformAddressMetadataShift();
++InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator(
++  const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) {
++  _masm = new MacroAssembler(buffer); // allocate on resourse area by default
++  _num_reg_int_args = (method->is_static() ? 1 : 0);
++  _num_reg_fp_args = 0;
++  _stack_offset = 0;
++}
 +
-+#endif // CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad b/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad
-new file mode 100644
-index 00000000000..6b6f87814a5
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad
-@@ -0,0 +1,233 @@
-+//
-+// Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
-+// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+//
-+// This code is free software; you can redistribute it and/or modify it
-+// under the terms of the GNU General Public License version 2 only, as
-+// published by the Free Software Foundation.
-+//
-+// This code is distributed in the hope that it will be useful, but WITHOUT
-+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+// version 2 for more details (a copy is included in the LICENSE file that
-+// accompanied this code).
-+//
-+// You should have received a copy of the GNU General Public License version
-+// 2 along with this work; if not, write to the Free Software Foundation,
-+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++// The C ABI specifies:
++// "integer scalars narrower than XLEN bits are widened according to the sign
++// of their type up to 32 bits, then sign-extended to XLEN bits."
++// Applies for both passed in register and stack.
 +//
-+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+// or visit www.oracle.com if you need additional information or have any
-+// questions.
++// Java uses 32-bit stack slots; jint, jshort, jchar, jbyte uses one slot.
++// Native uses 64-bit stack slots for all integer scalar types.
 +//
++// lw loads the Java stack slot, sign-extends and
++// sd store this widened integer into a 64 bit native stack slot.
++void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
++  const Address src(from(), Interpreter::local_offset_in_bytes(offset()));
 +
-+source_hpp %{
-+
-+#include "gc/shared/gc_globals.hpp"
-+#include "gc/z/c2/zBarrierSetC2.hpp"
-+#include "gc/z/zThreadLocalData.hpp"
-+
-+%}
++  Register reg = next_gpr();
++  if (reg != noreg) {
++    __ lw(reg, src);
++  } else {
++    __ lw(x10, src);
++    __ sd(x10, Address(to(), next_stack_offset()));
++  }
++}
 +
-+source %{
++void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
++  const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1));
 +
-+static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, int barrier_data) {
-+  if (barrier_data == ZLoadBarrierElided) {
-+    return;
++  Register reg = next_gpr();
++  if (reg != noreg) {
++    __ ld(reg, src);
++  } else  {
++    __ ld(x10, src);
++    __ sd(x10, Address(to(), next_stack_offset()));
 +  }
-+  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, barrier_data);
-+  __ ld(tmp, Address(xthread, ZThreadLocalData::address_bad_mask_offset()));
-+  __ andr(tmp, tmp, ref);
-+  __ bnez(tmp, *stub->entry(), true /* far */);
-+  __ bind(*stub->continuation());
 +}
 +
-+static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
-+  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, ZLoadBarrierStrong);
-+  __ j(*stub->entry());
-+  __ bind(*stub->continuation());
-+}
++void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
++  const Address src(from(), Interpreter::local_offset_in_bytes(offset()));
 +
-+%}
++  FloatRegister reg = next_fpr();
++  if (reg != fnoreg) {
++    __ flw(reg, src);
++  } else {
++    // a floating-point argument is passed according to the integer calling
++    // convention if no floating-point argument register available
++    pass_int();
++  }
++}
 +
-+// Load Pointer
-+instruct zLoadP(iRegPNoSp dst, memory mem)
-+%{
-+  match(Set dst (LoadP mem));
-+  predicate(UseZGC && (n->as_Load()->barrier_data() != 0));
-+  effect(TEMP dst);
++void InterpreterRuntime::SignatureHandlerGenerator::pass_double() {
++  const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1));
 +
-+  ins_cost(4 * DEFAULT_COST);
++  FloatRegister reg = next_fpr();
++  if (reg != fnoreg) {
++    __ fld(reg, src);
++  } else {
++    // a floating-point argument is passed according to the integer calling
++    // convention if no floating-point argument register available
++    pass_long();
++  }
++}
 +
-+  format %{ "ld  $dst, $mem, #@zLoadP" %}
++void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
++  Register reg = next_gpr();
++  if (reg == c_rarg1) {
++    assert(offset() == 0, "argument register 1 can only be (non-null) receiver");
++    __ addi(c_rarg1, from(), Interpreter::local_offset_in_bytes(offset()));
++  } else if (reg != noreg) {
++      // c_rarg2-c_rarg7
++      __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset()));
++      __ mv(reg, zr); //_num_reg_int_args:c_rarg -> 1:c_rarg2,  2:c_rarg3...
++      __ ld(temp(), x10);
++      Label L;
++      __ beqz(temp(), L);
++      __ mv(reg, x10);
++      __ bind(L);
++  } else {
++    //to stack
++    __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset()));
++    __ ld(temp(), x10);
++    Label L;
++    __ bnez(temp(), L);
++    __ mv(x10, zr);
++    __ bind(L);
++    assert(sizeof(jobject) == wordSize, "");
++    __ sd(x10, Address(to(), next_stack_offset()));
++  }
++}
 +
-+  ins_encode %{
-+    const Address ref_addr (as_Register($mem$$base), $mem$$disp);
-+    __ ld($dst$$Register, ref_addr);
-+    z_load_barrier(_masm, this, ref_addr, $dst$$Register, t0 /* tmp */, barrier_data());
-+  %}
++void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) {
++  // generate code to handle arguments
++  iterate(fingerprint);
 +
-+  ins_pipe(iload_reg_mem);
-+%}
++  // return result handler
++  __ la(x10, ExternalAddress(Interpreter::result_handler(method()->result_type())));
++  __ ret();
 +
-+instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
-+  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
-+  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
-+  predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
-+  effect(KILL cr, TEMP_DEF res);
++  __ flush();
++}
 +
-+  ins_cost(2 * VOLATILE_REF_COST);
 +
-+  format %{ "cmpxchg $mem, $oldval, $newval, #@zCompareAndSwapP\n\t"
-+            "mv $res, $res == $oldval" %}
++// Implementation of SignatureHandlerLibrary
 +
-+  ins_encode %{
-+    Label failed;
-+    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
-+    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
-+               Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
-+               true /* result_as_bool */);
-+    __ beqz($res$$Register, failed);
-+    __ mv(t0, $oldval$$Register);
-+    __ bind(failed);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      Label good;
-+      __ ld(t1, Address(xthread, ZThreadLocalData::address_bad_mask_offset()), t1 /* tmp */);
-+      __ andr(t1, t1, t0);
-+      __ beqz(t1, good);
-+      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), t0 /* ref */, t1 /* tmp */);
-+      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
-+                 Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
-+                 true /* result_as_bool */);
-+      __ bind(good);
-+    }
-+  %}
++void SignatureHandlerLibrary::pd_set_handler(address handler) {}
 +
-+  ins_pipe(pipe_slow);
-+%}
 +
-+instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
-+  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
-+  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
-+  predicate(UseZGC && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong));
-+  effect(KILL cr, TEMP_DEF res);
++class SlowSignatureHandler
++  : public NativeSignatureIterator {
++ private:
++  address   _from;
++  intptr_t* _to;
++  intptr_t* _int_args;
++  intptr_t* _fp_args;
++  intptr_t* _fp_identifiers;
++  unsigned int _num_reg_int_args;
++  unsigned int _num_reg_fp_args;
 +
-+  ins_cost(2 * VOLATILE_REF_COST);
++  intptr_t* single_slot_addr() {
++    intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++    return from_addr;
++  }
 +
-+  format %{ "cmpxchg $mem, $oldval, $newval, #@zCompareAndSwapPAcq\n\t"
-+            "mv $res, $res == $oldval" %}
++  intptr_t* double_slot_addr() {
++    intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(1));
++    _from -= 2 * Interpreter::stackElementSize;
++    return from_addr;
++  }
 +
-+  ins_encode %{
-+    Label failed;
-+    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
-+    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
-+               Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
-+               true /* result_as_bool */);
-+    __ beqz($res$$Register, failed);
-+    __ mv(t0, $oldval$$Register);
-+    __ bind(failed);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      Label good;
-+      __ ld(t1, Address(xthread, ZThreadLocalData::address_bad_mask_offset()), t1 /* tmp */);
-+      __ andr(t1, t1, t0);
-+      __ beqz(t1, good);
-+      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), t0 /* ref */, t1 /* tmp */);
-+      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
-+                 Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
-+                 true /* result_as_bool */);
-+      __ bind(good);
++  int pass_gpr(intptr_t value) {
++    if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) {
++      *_int_args++ = value;
++      return _num_reg_int_args++;
 +    }
-+  %}
-+
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{
-+  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
-+  predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
-+  effect(TEMP_DEF res);
-+
-+  ins_cost(2 * VOLATILE_REF_COST);
-+
-+  format %{ "cmpxchg $res = $mem, $oldval, $newval, #@zCompareAndExchangeP" %}
++    return -1;
++  }
 +
-+  ins_encode %{
-+    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
-+    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
-+               Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      Label good;
-+      __ ld(t0, Address(xthread, ZThreadLocalData::address_bad_mask_offset()));
-+      __ andr(t0, t0, $res$$Register);
-+      __ beqz(t0, good);
-+      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, t0 /* tmp */);
-+      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
-+                 Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register);
-+      __ bind(good);
++  int pass_fpr(intptr_t value) {
++    if (_num_reg_fp_args < Argument::n_float_register_parameters_c) {
++      *_fp_args++ = value;
++      return _num_reg_fp_args++;
 +    }
-+  %}
-+
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{
-+  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
-+  predicate(UseZGC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
-+  effect(TEMP_DEF res);
++    return -1;
++  }
 +
-+  ins_cost(2 * VOLATILE_REF_COST);
++  void pass_stack(intptr_t value) {
++    *_to++ = value;
++  }
 +
-+  format %{ "cmpxchg $res = $mem, $oldval, $newval, #@zCompareAndExchangePAcq" %}
++  virtual void pass_int() {
++    jint value = *(jint*)single_slot_addr();
++    if (pass_gpr(value) < 0) {
++      pass_stack(value);
++    }
++  }
 +
-+  ins_encode %{
-+    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
-+    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
-+               Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      Label good;
-+      __ ld(t0, Address(xthread, ZThreadLocalData::address_bad_mask_offset()));
-+      __ andr(t0, t0, $res$$Register);
-+      __ beqz(t0, good);
-+      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, t0 /* tmp */);
-+      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
-+                 Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register);
-+      __ bind(good);
++  virtual void pass_long() {
++    intptr_t value = *double_slot_addr();
++    if (pass_gpr(value) < 0) {
++      pass_stack(value);
 +    }
-+  %}
++  }
 +
-+  ins_pipe(pipe_slow);
-+%}
++  virtual void pass_object() {
++    intptr_t* addr = single_slot_addr();
++    intptr_t value = *addr == 0 ? NULL : (intptr_t)addr;
++    if (pass_gpr(value) < 0) {
++      pass_stack(value);
++    }
++  }
 +
-+instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{
-+  match(Set prev (GetAndSetP mem newv));
-+  predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
-+  effect(TEMP_DEF prev, KILL cr);
++  virtual void pass_float() {
++    jint value = *(jint*) single_slot_addr();
++    // a floating-point argument is passed according to the integer calling
++    // convention if no floating-point argument register available
++    if (pass_fpr(value) < 0 && pass_gpr(value) < 0) {
++      pass_stack(value);
++    }
++  }
 +
-+  ins_cost(2 * VOLATILE_REF_COST);
++  virtual void pass_double() {
++    intptr_t value = *double_slot_addr();
++    int arg = pass_fpr(value);
++    if (0 <= arg) {
++      *_fp_identifiers |= (1ull << arg); // mark as double
++    } else if (pass_gpr(value) < 0) { // no need to mark if passing by integer registers or stack
++      pass_stack(value);
++    }
++  }
 +
-+  format %{ "atomic_xchg  $prev, $newv, [$mem], #@zGetAndSetP" %}
++ public:
++  SlowSignatureHandler(const methodHandle& method, address from, intptr_t* to)
++    : NativeSignatureIterator(method)
++  {
++    _from = from;
++    _to   = to;
 +
-+  ins_encode %{
-+    __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
-+    z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, t0 /* tmp */, barrier_data());
-+  %}
++    _int_args = to - (method->is_static() ? 16 : 17);
++    _fp_args  = to - 8;
++    _fp_identifiers = to - 9;
++    *(int*) _fp_identifiers = 0;
++    _num_reg_int_args = (method->is_static() ? 1 : 0);
++    _num_reg_fp_args = 0;
++  }
 +
-+  ins_pipe(pipe_serial);
-+%}
++  ~SlowSignatureHandler()
++  {
++    _from           = NULL;
++    _to             = NULL;
++    _int_args       = NULL;
++    _fp_args        = NULL;
++    _fp_identifiers = NULL;
++  }
++};
 +
-+instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{
-+  match(Set prev (GetAndSetP mem newv));
-+  predicate(UseZGC && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() != 0));
-+  effect(TEMP_DEF prev, KILL cr);
 +
-+  ins_cost(VOLATILE_REF_COST);
++IRT_ENTRY(address,
++          InterpreterRuntime::slow_signature_handler(JavaThread* thread,
++                                                     Method* method,
++                                                     intptr_t* from,
++                                                     intptr_t* to))
++  methodHandle m(thread, (Method*)method);
++  assert(m->is_native(), "sanity check");
 +
-+  format %{ "atomic_xchg_acq  $prev, $newv, [$mem], #@zGetAndSetPAcq" %}
++  // handle arguments
++  SlowSignatureHandler ssh(m, (address)from, to);
++  ssh.iterate(UCONST64(-1));
 +
-+  ins_encode %{
-+    __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
-+    z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, t0 /* tmp */, barrier_data());
-+  %}
-+  ins_pipe(pipe_serial);
-+%}
-diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
++  // return result handler
++  return Interpreter::result_handler(m->result_type());
++IRT_END
+diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp
 new file mode 100644
-index 00000000000..2936837d951
+index 0000000000..05df63ba2a
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
-@@ -0,0 +1,52 @@
++++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp
+@@ -0,0 +1,68 @@
 +/*
-+ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -19160,40 +18520,56 @@ index 00000000000..2936837d951
 + *
 + */
 +
-+#ifndef CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
-+#define CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
++#ifndef CPU_RISCV_INTERPRETERRT_RISCV_HPP
++#define CPU_RISCV_INTERPRETERRT_RISCV_HPP
 +
-+const int StackAlignmentInBytes = 16;
++// This is included in the middle of class Interpreter.
++// Do not include files here.
 +
-+// Indicates whether the C calling conventions require that
-+// 32-bit integer argument values are extended to 64 bits.
-+const bool CCallingConventionRequiresIntsAsLongs = false;
++// native method calls
 +
-+// RISCV has adopted a multicopy atomic model closely following
-+// that of ARMv8.
-+#define CPU_MULTI_COPY_ATOMIC
++class SignatureHandlerGenerator: public NativeSignatureIterator {
++ private:
++  MacroAssembler* _masm;
++  unsigned int _num_reg_fp_args;
++  unsigned int _num_reg_int_args;
++  int _stack_offset;
 +
-+// To be safe, we deoptimize when we come across an access that needs
-+// patching. This is similar to what is done on aarch64.
-+#define DEOPTIMIZE_WHEN_PATCHING
++  void pass_int();
++  void pass_long();
++  void pass_float();
++  void pass_double();
++  void pass_object();
 +
-+#define SUPPORTS_NATIVE_CX8
++  Register next_gpr();
++  FloatRegister next_fpr();
++  int next_stack_offset();
 +
-+#define SUPPORT_RESERVED_STACK_AREA
++ public:
++  // Creation
++  SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer);
++  virtual ~SignatureHandlerGenerator() {
++    _masm = NULL;
++  }
 +
-+#define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS false
++  // Code generation
++  void generate(uint64_t fingerprint);
 +
-+#define USE_POINTERS_TO_REGISTER_IMPL_ARRAY
++  // Code generation support
++  static Register from();
++  static Register to();
++  static Register temp();
++};
 +
-+#endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
++#endif // CPU_RISCV_INTERPRETERRT_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
 new file mode 100644
-index 00000000000..cbfc0583883
+index 0000000000..5a0c9b812f
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
-@@ -0,0 +1,99 @@
++++ b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
+@@ -0,0 +1,89 @@
 +/*
-+ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -19217,90 +18593,80 @@ index 00000000000..cbfc0583883
 + *
 + */
 +
-+#ifndef CPU_RISCV_GLOBALS_RISCV_HPP
-+#define CPU_RISCV_GLOBALS_RISCV_HPP
++#ifndef CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
++#define CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
 +
-+#include "utilities/globalDefinitions.hpp"
-+#include "utilities/macros.hpp"
++private:
 +
-+// Sets the default values for platform dependent flags used by the runtime system.
-+// (see globals.hpp)
++  // FP value associated with _last_Java_sp:
++  intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to
 +
-+define_pd_global(bool, ImplicitNullChecks,       true);  // Generate code for implicit null checks
-+define_pd_global(bool, TrapBasedNullChecks,      false);
-+define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs past to check cast
++public:
++  // Each arch must define reset, save, restore
++  // These are used by objects that only care about:
++  //  1 - initializing a new state (thread creation, javaCalls)
++  //  2 - saving a current state (javaCalls)
++  //  3 - restoring an old state (javaCalls)
 +
-+define_pd_global(uintx, CodeCacheSegmentSize,    64 COMPILER1_AND_COMPILER2_PRESENT(+64)); // Tiered compilation has large code-entry alignment.
-+define_pd_global(intx, CodeEntryAlignment,       64);
-+define_pd_global(intx, OptoLoopAlignment,        16);
++  void clear(void) {
++    // clearing _last_Java_sp must be first
++    _last_Java_sp = NULL;
++    OrderAccess::release();
++    _last_Java_fp = NULL;
++    _last_Java_pc = NULL;
++  }
 +
-+#define DEFAULT_STACK_YELLOW_PAGES (2)
-+#define DEFAULT_STACK_RED_PAGES (1)
-+// Java_java_net_SocketOutputStream_socketWrite0() uses a 64k buffer on the
-+// stack if compiled for unix and LP64. To pass stack overflow tests we need
-+// 20 shadow pages.
-+#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+5))
-+#define DEFAULT_STACK_RESERVED_PAGES (1)
++  void copy(JavaFrameAnchor* src) {
++    // In order to make sure the transition state is valid for "this"
++    // We must clear _last_Java_sp before copying the rest of the new data
++    //
++    // Hack Alert: Temporary bugfix for 4717480/4721647
++    // To act like previous version (pd_cache_state) don't NULL _last_Java_sp
++    // unless the value is changing
++    //
++    assert(src != NULL, "Src should not be NULL.");
++    if (_last_Java_sp != src->_last_Java_sp) {
++      _last_Java_sp = NULL;
++      OrderAccess::release();
++    }
++    _last_Java_fp = src->_last_Java_fp;
++    _last_Java_pc = src->_last_Java_pc;
++    // Must be last so profiler will always see valid frame if has_last_frame() is true
++    _last_Java_sp = src->_last_Java_sp;
++  }
 +
-+#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES
-+#define MIN_STACK_RED_PAGES    DEFAULT_STACK_RED_PAGES
-+#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES
-+#define MIN_STACK_RESERVED_PAGES (0)
++  bool walkable(void)                            { return _last_Java_sp != NULL && _last_Java_pc != NULL; }
++  void make_walkable(JavaThread* thread);
++  void capture_last_Java_pc(void);
 +
-+define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES);
-+define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES);
-+define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES);
-+define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES);
++  intptr_t* last_Java_sp(void) const             { return _last_Java_sp; }
 +
-+define_pd_global(bool, RewriteBytecodes,     true);
-+define_pd_global(bool, RewriteFrequentPairs, true);
++  const address last_Java_pc(void)               { return _last_Java_pc; }
 +
-+define_pd_global(bool, PreserveFramePointer, false);
++private:
 +
-+define_pd_global(uintx, TypeProfileLevel, 111);
++  static ByteSize last_Java_fp_offset()          { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); }
 +
-+define_pd_global(bool, CompactStrings, true);
++public:
 +
-+// Clear short arrays bigger than one word in an arch-specific way
-+define_pd_global(intx, InitArrayShortSize, BytesPerLong);
++  void set_last_Java_sp(intptr_t* java_sp)       { _last_Java_sp = java_sp; OrderAccess::release(); }
 +
-+define_pd_global(intx, InlineSmallCode,          1000);
++  intptr_t* last_Java_fp(void)                   { return _last_Java_fp; }
 +
-+#define ARCH_FLAGS(develop,                                                      \
-+                   product,                                                      \
-+                   notproduct,                                                   \
-+                   range,                                                        \
-+                   constraint)                                                   \
-+                                                                                 \
-+  product(bool, NearCpool, true,                                                 \
-+         "constant pool is close to instructions")                               \
-+  product(intx, BlockZeroingLowLimit, 256,                                       \
-+          "Minimum size in bytes when block zeroing will be used")               \
-+          range(1, max_jint)                                                     \
-+  product(bool, TraceTraps, false, "Trace all traps the signal handler")         \
-+  /* For now we're going to be safe and add the I/O bits to userspace fences. */ \
-+  product(bool, UseConservativeFence, true,                                      \
-+          "Extend i for r and o for w in the pred/succ flags of fence;"          \
-+          "Extend fence.i to fence.i + fence.")                                  \
-+  product(bool, AvoidUnalignedAccesses, true,                                    \
-+          "Avoid generating unaligned memory accesses")                          \
-+  product(bool, UseRVV, false, EXPERIMENTAL, "Use RVV instructions")             \
-+  product(bool, UseRVB, false, EXPERIMENTAL, "Use RVB instructions")             \
-+  product(bool, UseRVC, false, EXPERIMENTAL, "Use RVC instructions")             \
-+  product(bool, UseRVVForBigIntegerShiftIntrinsics, true,                        \
-+          "Use RVV instructions for left/right shift of BigInteger")
++  // Assert (last_Java_sp == NULL || fp == NULL)
++  void set_last_Java_fp(intptr_t* fp)            { OrderAccess::release(); _last_Java_fp = fp; }
 +
-+#endif // CPU_RISCV_GLOBALS_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/icBuffer_riscv.cpp b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp
++#endif // CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
 new file mode 100644
-index 00000000000..cc93103dc55
+index 0000000000..f6e7351c4f
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp
-@@ -0,0 +1,79 @@
++++ b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
+@@ -0,0 +1,194 @@
 +/*
-+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2004, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -19325,123 +18691,181 @@ index 00000000000..cc93103dc55
 +
 +#include "precompiled.hpp"
 +#include "asm/macroAssembler.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "code/icBuffer.hpp"
-+#include "gc/shared/collectedHeap.inline.hpp"
-+#include "interpreter/bytecodes.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
 +#include "memory/resourceArea.hpp"
-+#include "nativeInst_riscv.hpp"
-+#include "oops/oop.inline.hpp"
-+
-+int InlineCacheBuffer::ic_stub_code_size() {
-+  // 6: auipc + ld + auipc + jalr + address(2 * instruction_size)
-+  // 5: auipc + ld + j + address(2 * instruction_size)
-+  return (MacroAssembler::far_branches() ? 6 : 5) * NativeInstruction::instruction_size;
-+}
++#include "prims/jniFastGetField.hpp"
++#include "prims/jvm_misc.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "runtime/safepoint.hpp"
 +
 +#define __ masm->
 +
-+void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) {
-+  assert_cond(code_begin != NULL && entry_point != NULL);
++#define BUFFER_SIZE 30*wordSize
++
++// Instead of issuing a LoadLoad barrier we create an address
++// dependency between loads; this might be more efficient.
++
++// Common register usage:
++// x10/f10:      result
++// c_rarg0:    jni env
++// c_rarg1:    obj
++// c_rarg2:    jfield id
++
++static const Register robj          = x13;
++static const Register rcounter      = x14;
++static const Register roffset       = x15;
++static const Register rcounter_addr = x16;
++static const Register result        = x17;
++
++address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
++  const char *name;
++  switch (type) {
++    case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break;
++    case T_BYTE:    name = "jni_fast_GetByteField";    break;
++    case T_CHAR:    name = "jni_fast_GetCharField";    break;
++    case T_SHORT:   name = "jni_fast_GetShortField";   break;
++    case T_INT:     name = "jni_fast_GetIntField";     break;
++    case T_LONG:    name = "jni_fast_GetLongField";    break;
++    case T_FLOAT:   name = "jni_fast_GetFloatField";   break;
++    case T_DOUBLE:  name = "jni_fast_GetDoubleField";  break;
++    default:        ShouldNotReachHere();
++      name = NULL;  // unreachable
++  }
 +  ResourceMark rm;
-+  CodeBuffer      code(code_begin, ic_stub_code_size());
-+  MacroAssembler* masm            = new MacroAssembler(&code);
-+  // Note: even though the code contains an embedded value, we do not need reloc info
-+  // because
-+  // (1) the value is old (i.e., doesn't matter for scavenges)
-+  // (2) these ICStubs are removed *before* a GC happens, so the roots disappear
++  BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE);
++  CodeBuffer cbuf(blob);
++  MacroAssembler* masm = new MacroAssembler(&cbuf);
++  address fast_entry = __ pc();
 +
-+  address start = __ pc();
-+  Label l;
-+  __ ld(t1, l);
-+  __ far_jump(ExternalAddress(entry_point));
-+  __ align(wordSize);
-+  __ bind(l);
-+  __ emit_int64((intptr_t)cached_value);
-+  // Only need to invalidate the 1st two instructions - not the whole ic stub
-+  ICache::invalidate_range(code_begin, InlineCacheBuffer::ic_stub_code_size());
-+  assert(__ pc() - start == ic_stub_code_size(), "must be");
-+}
++  Label slow;
++  int32_t offset = 0;
++  __ la_patchable(rcounter_addr, SafepointSynchronize::safepoint_counter_addr(), offset);
++  __ addi(rcounter_addr, rcounter_addr, offset);
 +
-+address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
-+  NativeMovConstReg* move = nativeMovConstReg_at(code_begin);   // creation also verifies the object
-+  NativeJump* jump = nativeJump_at(move->next_instruction_address());
-+  return jump->jump_destination();
-+}
++  Address safepoint_counter_addr(rcounter_addr, 0);
++  __ lwu(rcounter, safepoint_counter_addr);
++  // An even value means there are no ongoing safepoint operations
++  __ andi(t0, rcounter, 1);
++  __ bnez(t0, slow);
++  __ xorr(robj, c_rarg1, rcounter);
++  __ xorr(robj, robj, rcounter);               // obj, since
++                                               // robj ^ rcounter ^ rcounter == robj
++                                               // robj is address dependent on rcounter.
 +
++  // Both robj and t0 are clobbered by try_resolve_jobject_in_native.
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  assert_cond(bs != NULL);
++  bs->try_resolve_jobject_in_native(masm, c_rarg0, robj, t0, slow);
 +
-+void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
-+  // The word containing the cached value is at the end of this IC buffer
-+  uintptr_t *p = (uintptr_t *)(code_begin + ic_stub_code_size() - wordSize);
-+  void* o = (void*)*p;
-+  return o;
-+}
-diff --git a/src/hotspot/cpu/riscv/icache_riscv.cpp b/src/hotspot/cpu/riscv/icache_riscv.cpp
-new file mode 100644
-index 00000000000..922a80f9f3e
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/icache_riscv.cpp
-@@ -0,0 +1,51 @@
-+/*
-+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  __ srli(roffset, c_rarg2, 2);                // offset
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.hpp"
-+#include "runtime/icache.hpp"
++  assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
++  speculative_load_pclist[count] = __ pc();   // Used by the segfault handler
++  __ add(roffset, robj, roffset);
 +
-+#define __ _masm->
++  switch (type) {
++    case T_BOOLEAN: __ lbu(result, Address(roffset, 0)); break;
++    case T_BYTE:    __ lb(result, Address(roffset, 0)); break;
++    case T_CHAR:    __ lhu(result, Address(roffset, 0)); break;
++    case T_SHORT:   __ lh(result, Address(roffset, 0)); break;
++    case T_INT:     __ lw(result, Address(roffset, 0)); break;
++    case T_LONG:    __ ld(result, Address(roffset, 0)); break;
++    case T_FLOAT: {
++      __ flw(f28, Address(roffset, 0)); // f28 as temporaries
++      __ fmv_x_w(result, f28); // f{31--0}-->x
++      break;
++    }
++    case T_DOUBLE: {
++      __ fld(f28, Address(roffset, 0)); // f28 as temporaries
++      __ fmv_x_d(result, f28); // d{63--0}-->x
++      break;
++    }
++    default:        ShouldNotReachHere();
++  }
 +
-+static int icache_flush(address addr, int lines, int magic) {
-+  os::icache_flush((long int) addr, (long int) (addr + (lines << ICache::log2_line_size)));
-+  return magic;
-+}
++  __ xorr(rcounter_addr, rcounter_addr, result);
++  __ xorr(rcounter_addr, rcounter_addr, result);
++  __ lw(t0, safepoint_counter_addr);
++  __ bne(rcounter, t0, slow);
 +
-+void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) {
-+  address start = (address)icache_flush;
-+  *flush_icache_stub = (ICache::flush_icache_stub_t)start;
++  switch (type) {
++    case T_FLOAT:   __ fmv_w_x(f10, result); break;
++    case T_DOUBLE:  __ fmv_d_x(f10, result); break;
++    default:        __ mv(x10, result);   break;
++  }
++  __ ret();
 +
-+  // ICache::invalidate_range() contains explicit condition that the first
-+  // call is invoked on the generated icache flush stub code range.
-+  ICache::invalidate_range(start, 0);
++  slowcase_entry_pclist[count++] = __ pc();
++  __ bind(slow);
++  address slow_case_addr;
++  switch (type) {
++    case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break;
++    case T_BYTE:    slow_case_addr = jni_GetByteField_addr();    break;
++    case T_CHAR:    slow_case_addr = jni_GetCharField_addr();    break;
++    case T_SHORT:   slow_case_addr = jni_GetShortField_addr();   break;
++    case T_INT:     slow_case_addr = jni_GetIntField_addr();     break;
++    case T_LONG:    slow_case_addr = jni_GetLongField_addr();    break;
++    case T_FLOAT:   slow_case_addr = jni_GetFloatField_addr();   break;
++    case T_DOUBLE:  slow_case_addr = jni_GetDoubleField_addr();  break;
++    default:        ShouldNotReachHere();
++      slow_case_addr = NULL;  // unreachable
++  }
 +
 +  {
-+    StubCodeMark mark(this, "ICache", "fake_stub_for_inlined_icache_flush");
++    __ enter();
++    int32_t tmp_offset = 0;
++    __ la_patchable(t0, ExternalAddress(slow_case_addr), tmp_offset);
++    __ jalr(x1, t0, tmp_offset);
++    __ leave();
 +    __ ret();
 +  }
++  __ flush();
++
++  return fast_entry;
 +}
 +
-+#undef __
-diff --git a/src/hotspot/cpu/riscv/icache_riscv.hpp b/src/hotspot/cpu/riscv/icache_riscv.hpp
++
++address JNI_FastGetField::generate_fast_get_boolean_field() {
++  return generate_fast_get_int_field0(T_BOOLEAN);
++}
++
++address JNI_FastGetField::generate_fast_get_byte_field() {
++  return generate_fast_get_int_field0(T_BYTE);
++}
++
++address JNI_FastGetField::generate_fast_get_char_field() {
++  return generate_fast_get_int_field0(T_CHAR);
++}
++
++address JNI_FastGetField::generate_fast_get_short_field() {
++  return generate_fast_get_int_field0(T_SHORT);
++}
++
++address JNI_FastGetField::generate_fast_get_int_field() {
++  return generate_fast_get_int_field0(T_INT);
++}
++
++address JNI_FastGetField::generate_fast_get_long_field() {
++  return generate_fast_get_int_field0(T_LONG);
++}
++
++address JNI_FastGetField::generate_fast_get_float_field() {
++  return generate_fast_get_int_field0(T_FLOAT);
++}
++
++address JNI_FastGetField::generate_fast_get_double_field() {
++  return generate_fast_get_int_field0(T_DOUBLE);
++}
+diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
 new file mode 100644
-index 00000000000..5bf40ca8204
+index 0000000000..df3c0267ee
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/icache_riscv.hpp
-@@ -0,0 +1,42 @@
++++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
+@@ -0,0 +1,106 @@
 +/*
-+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -19464,31 +18888,95 @@ index 00000000000..5bf40ca8204
 + *
 + */
 +
-+#ifndef CPU_RISCV_ICACHE_RISCV_HPP
-+#define CPU_RISCV_ICACHE_RISCV_HPP
++#ifndef CPU_RISCV_JNITYPES_RISCV_HPP
++#define CPU_RISCV_JNITYPES_RISCV_HPP
 +
-+// Interface for updating the instruction cache. Whenever the VM
-+// modifies code, part of the processor instruction cache potentially
-+// has to be flushed.
++#include "jni.h"
++#include "oops/oop.hpp"
++
++// This file holds platform-dependent routines used to write primitive jni
++// types to the array of arguments passed into JavaCalls::call
++
++class JNITypes : private AllStatic {
++  // These functions write a java primitive type (in native format)
++  // to a java stack slot array to be passed as an argument to JavaCalls:calls.
++  // I.e., they are functionally 'push' operations if they have a 'pos'
++  // formal parameter.  Note that jlong's and jdouble's are written
++  // _in reverse_ of the order in which they appear in the interpreter
++  // stack.  This is because call stubs (see stubGenerator_sparc.cpp)
++  // reverse the argument list constructed by JavaCallArguments (see
++  // javaCalls.hpp).
 +
-+class ICache : public AbstractICache {
 +public:
-+  enum {
-+    stub_size      = 16,                // Size of the icache flush stub in bytes
-+    line_size      = BytesPerWord,      // conservative
-+    log2_line_size = LogBytesPerWord    // log2(line_size)
-+  };
++  // Ints are stored in native format in one JavaCallArgument slot at *to.
++  static inline void    put_int(jint  from, intptr_t *to)           { *(jint *)(to +   0  ) =  from; }
++  static inline void    put_int(jint  from, intptr_t *to, int& pos) { *(jint *)(to + pos++) =  from; }
++  static inline void    put_int(jint *from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = *from; }
++
++  // Longs are stored in native format in one JavaCallArgument slot at
++  // *(to+1).
++  static inline void put_long(jlong  from, intptr_t *to) {
++    *(jlong*) (to + 1) = from;
++  }
++
++  static inline void put_long(jlong  from, intptr_t *to, int& pos) {
++    *(jlong*) (to + 1 + pos) = from;
++    pos += 2;
++  }
++
++  static inline void put_long(jlong *from, intptr_t *to, int& pos) {
++    *(jlong*) (to + 1 + pos) = *from;
++    pos += 2;
++  }
++
++  // Oops are stored in native format in one JavaCallArgument slot at *to.
++  static inline void    put_obj(oop  from, intptr_t *to)                { *(oop *)(to +   0  ) =  from; }
++  static inline void    put_obj(oop  from, intptr_t *to, int& pos)      { *(oop *)(to + pos++) =  from; }
++  static inline void    put_obj(oop *from, intptr_t *to, int& pos)      { *(oop *)(to + pos++) = *from; }
++
++  // Floats are stored in native format in one JavaCallArgument slot at *to.
++  static inline void    put_float(jfloat  from, intptr_t *to)           { *(jfloat *)(to +   0  ) =  from;  }
++  static inline void    put_float(jfloat  from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) =  from; }
++  static inline void    put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; }
++
++#undef _JNI_SLOT_OFFSET
++#define _JNI_SLOT_OFFSET 1
++  // Doubles are stored in native word format in one JavaCallArgument
++  // slot at *(to+1).
++  static inline void put_double(jdouble  from, intptr_t *to) {
++    *(jdouble*) (to + 1) = from;
++  }
++
++  static inline void put_double(jdouble  from, intptr_t *to, int& pos) {
++    *(jdouble*) (to + 1 + pos) = from;
++    pos += 2;
++  }
++
++  static inline void put_double(jdouble *from, intptr_t *to, int& pos) {
++    *(jdouble*) (to + 1 + pos) = *from;
++    pos += 2;
++  }
++
++  // The get_xxx routines, on the other hand, actually _do_ fetch
++  // java primitive types from the interpreter stack.
++  // No need to worry about alignment on Intel.
++  static inline jint    get_int   (intptr_t *from) { return *(jint *)   from; }
++  static inline jlong   get_long  (intptr_t *from) { return *(jlong *)  (from + _JNI_SLOT_OFFSET); }
++  static inline oop     get_obj   (intptr_t *from) { return *(oop *)    from; }
++  static inline jfloat  get_float (intptr_t *from) { return *(jfloat *) from; }
++  static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); }
++#undef _JNI_SLOT_OFFSET
 +};
 +
-+#endif // CPU_RISCV_ICACHE_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
++#endif // CPU_RISCV_JNITYPES_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
 new file mode 100644
-index 00000000000..d12dcb2af19
+index 0000000000..14e07036ac
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-@@ -0,0 +1,1940 @@
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+@@ -0,0 +1,5390 @@
 +/*
-+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -19514,7041 +19002,6855 @@ index 00000000000..d12dcb2af19
 + */
 +
 +#include "precompiled.hpp"
-+#include "asm/macroAssembler.inline.hpp"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "compiler/disassembler.hpp"
 +#include "gc/shared/barrierSet.hpp"
 +#include "gc/shared/barrierSetAssembler.hpp"
-+#include "interp_masm_riscv.hpp"
++#include "gc/shared/cardTable.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "interpreter/bytecodeHistogram.hpp"
 +#include "interpreter/interpreter.hpp"
-+#include "interpreter/interpreterRuntime.hpp"
-+#include "logging/log.hpp"
-+#include "oops/arrayOop.hpp"
-+#include "oops/markWord.hpp"
-+#include "oops/method.hpp"
-+#include "oops/methodData.hpp"
-+#include "prims/jvmtiExport.hpp"
-+#include "prims/jvmtiThreadState.hpp"
-+#include "runtime/basicLock.hpp"
-+#include "runtime/frame.inline.hpp"
-+#include "runtime/safepointMechanism.hpp"
++#include "memory/resourceArea.hpp"
++#include "memory/universe.hpp"
++#include "nativeInst_riscv.hpp"
++#include "oops/accessDecorators.hpp"
++#include "oops/compressedOops.inline.hpp"
++#include "oops/klass.inline.hpp"
++#include "oops/oop.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/jniHandles.inline.hpp"
 +#include "runtime/sharedRuntime.hpp"
-+#include "runtime/thread.inline.hpp"
-+#include "utilities/powerOfTwo.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.hpp"
++#ifdef COMPILER2
++#include "opto/compile.hpp"
++#include "opto/intrinsicnode.hpp"
++#include "opto/node.hpp"
++#include "opto/output.hpp"
++#endif
 +
-+void InterpreterMacroAssembler::narrow(Register result) {
-+  // Get method->_constMethod->_result_type
-+  ld(t0, Address(fp, frame::interpreter_frame_method_offset * wordSize));
-+  ld(t0, Address(t0, Method::const_offset()));
-+  lbu(t0, Address(t0, ConstMethod::result_type_offset()));
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) block_comment(str)
++#endif
++#define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":")
 +
-+  Label done, notBool, notByte, notChar;
++static void pass_arg0(MacroAssembler* masm, Register arg) {
++  if (c_rarg0 != arg) {
++    masm->mv(c_rarg0, arg);
++  }
++}
 +
-+  // common case first
-+  mv(t1, T_INT);
-+  beq(t0, t1, done);
++static void pass_arg1(MacroAssembler* masm, Register arg) {
++  if (c_rarg1 != arg) {
++    masm->mv(c_rarg1, arg);
++  }
++}
 +
-+  // mask integer result to narrower return type.
-+  mv(t1, T_BOOLEAN);
-+  bne(t0, t1, notBool);
++static void pass_arg2(MacroAssembler* masm, Register arg) {
++  if (c_rarg2 != arg) {
++    masm->mv(c_rarg2, arg);
++  }
++}
 +
-+  andi(result, result, 0x1);
-+  j(done);
++static void pass_arg3(MacroAssembler* masm, Register arg) {
++  if (c_rarg3 != arg) {
++    masm->mv(c_rarg3, arg);
++  }
++}
 +
-+  bind(notBool);
-+  mv(t1, T_BYTE);
-+  bne(t0, t1, notByte);
-+  sign_extend(result, result, 8);
-+  j(done);
++void MacroAssembler::align(int modulus, int extra_offset) {
++  CompressibleRegion cr(this);
++  while ((offset() + extra_offset) % modulus != 0) { nop(); }
++}
 +
-+  bind(notByte);
-+  mv(t1, T_CHAR);
-+  bne(t0, t1, notChar);
-+  zero_extend(result, result, 16);
-+  j(done);
++void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
++  call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions);
++}
 +
-+  bind(notChar);
-+  sign_extend(result, result, 16);
++// Implementation of call_VM versions
 +
-+  // Nothing to do for T_INT
-+  bind(done);
-+  addw(result, result, zr);
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             bool check_exceptions) {
++  call_VM_helper(oop_result, entry_point, 0, check_exceptions);
 +}
 +
-+void InterpreterMacroAssembler::jump_to_entry(address entry) {
-+  assert(entry != NULL, "Entry must have been generated by now");
-+  j(entry);
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             bool check_exceptions) {
++  pass_arg1(this, arg_1);
++  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
 +}
 +
-+void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) {
-+  if (JvmtiExport::can_pop_frame()) {
-+    Label L;
-+    // Initiate popframe handling only if it is not already being
-+    // processed. If the flag has the popframe_processing bit set,
-+    // it means that this code is called *during* popframe handling - we
-+    // don't want to reenter.
-+    // This method is only called just after the call into the vm in
-+    // call_VM_base, so the arg registers are available.
-+    lwu(t1, Address(xthread, JavaThread::popframe_condition_offset()));
-+    andi(t0, t1, JavaThread::popframe_pending_bit);
-+    beqz(t0, L);
-+    andi(t0, t1, JavaThread::popframe_processing_bit);
-+    bnez(t0, L);
-+    // Call Interpreter::remove_activation_preserving_args_entry() to get the
-+    // address of the same-named entrypoint in the generated interpreter code.
-+    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry));
-+    jr(x10);
-+    bind(L);
-+  }
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             bool check_exceptions) {
++  assert(arg_1 != c_rarg2, "smashed arg");
++  pass_arg2(this, arg_2);
++  pass_arg1(this, arg_1);
++  call_VM_helper(oop_result, entry_point, 2, check_exceptions);
 +}
 +
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             Register arg_3,
++                             bool check_exceptions) {
++  assert(arg_1 != c_rarg3, "smashed arg");
++  assert(arg_2 != c_rarg3, "smashed arg");
++  pass_arg3(this, arg_3);
 +
-+void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
-+  ld(x12, Address(xthread, JavaThread::jvmti_thread_state_offset()));
-+  const Address tos_addr(x12, JvmtiThreadState::earlyret_tos_offset());
-+  const Address oop_addr(x12, JvmtiThreadState::earlyret_oop_offset());
-+  const Address val_addr(x12, JvmtiThreadState::earlyret_value_offset());
-+  switch (state) {
-+    case atos:
-+      ld(x10, oop_addr);
-+      sd(zr, oop_addr);
-+      verify_oop(x10);
-+      break;
-+    case ltos:
-+      ld(x10, val_addr);
-+      break;
-+    case btos:  // fall through
-+    case ztos:  // fall through
-+    case ctos:  // fall through
-+    case stos:  // fall through
-+    case itos:
-+      lwu(x10, val_addr);
-+      break;
-+    case ftos:
-+      flw(f10, val_addr);
-+      break;
-+    case dtos:
-+      fld(f10, val_addr);
-+      break;
-+    case vtos:
-+      /* nothing to do */
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
-+  // Clean up tos value in the thread object
-+  mvw(t0, (int) ilgl);
-+  sw(t0, tos_addr);
-+  sw(zr, val_addr);
++  assert(arg_1 != c_rarg2, "smashed arg");
++  pass_arg2(this, arg_2);
++
++  pass_arg1(this, arg_1);
++  call_VM_helper(oop_result, entry_point, 3, check_exceptions);
 +}
 +
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             int number_of_arguments,
++                             bool check_exceptions) {
++  call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
++}
 +
-+void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) {
-+  if (JvmtiExport::can_force_early_return()) {
-+    Label L;
-+    ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset()));
-+    beqz(t0, L);  // if [thread->jvmti_thread_state() == NULL] then exit
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             bool check_exceptions) {
++  pass_arg1(this, arg_1);
++  call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
++}
 +
-+    // Initiate earlyret handling only if it is not already being processed.
-+    // If the flag has the earlyret_processing bit set, it means that this code
-+    // is called *during* earlyret handling - we don't want to reenter.
-+    lwu(t0, Address(t0, JvmtiThreadState::earlyret_state_offset()));
-+    mv(t1, JvmtiThreadState::earlyret_pending);
-+    bne(t0, t1, L);
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             bool check_exceptions) {
 +
-+    // Call Interpreter::remove_activation_early_entry() to get the address of the
-+    // same-named entrypoint in the generated interpreter code.
-+    ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset()));
-+    lwu(t0, Address(t0, JvmtiThreadState::earlyret_tos_offset()));
-+    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), t0);
-+    jr(x10);
-+    bind(L);
-+  }
++  assert(arg_1 != c_rarg2, "smashed arg");
++  pass_arg2(this, arg_2);
++  pass_arg1(this, arg_1);
++  call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
 +}
 +
-+void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset) {
-+  assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode");
-+  lhu(reg, Address(xbcp, bcp_offset));
-+  revb_h(reg, reg);
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             Register arg_3,
++                             bool check_exceptions) {
++  assert(arg_1 != c_rarg3, "smashed arg");
++  assert(arg_2 != c_rarg3, "smashed arg");
++  pass_arg3(this, arg_3);
++  assert(arg_1 != c_rarg2, "smashed arg");
++  pass_arg2(this, arg_2);
++  pass_arg1(this, arg_1);
++  call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
 +}
 +
-+void InterpreterMacroAssembler::get_dispatch() {
-+  int32_t offset = 0;
-+  la_patchable(xdispatch, ExternalAddress((address)Interpreter::dispatch_table()), offset);
-+  addi(xdispatch, xdispatch, offset);
-+}
++// these are no-ops overridden by InterpreterMacroAssembler
++void MacroAssembler::check_and_handle_earlyret(Register java_thread) {}
++void MacroAssembler::check_and_handle_popframe(Register java_thread) {}
 +
-+void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index,
-+                                                       int bcp_offset,
-+                                                       size_t index_size) {
-+  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
-+  if (index_size == sizeof(u2)) {
-+    load_unsigned_short(index, Address(xbcp, bcp_offset));
-+  } else if (index_size == sizeof(u4)) {
-+    lwu(index, Address(xbcp, bcp_offset));
-+    // Check if the secondary index definition is still ~x, otherwise
-+    // we have to change the following assembler code to calculate the
-+    // plain index.
-+    assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line");
-+    xori(index, index, -1);
-+    addw(index, index, zr);
-+  } else if (index_size == sizeof(u1)) {
-+    load_unsigned_byte(index, Address(xbcp, bcp_offset));
-+  } else {
-+    ShouldNotReachHere();
-+  }
++RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
++                                                      Register tmp,
++                                                      int offset) {
++  intptr_t value = *delayed_value_addr;
++  if (value != 0)
++    return RegisterOrConstant(value + offset);
++
++  // load indirectly to solve generation ordering problem
++  ld(tmp, ExternalAddress((address) delayed_value_addr));
++
++  if (offset != 0)
++    add(tmp, tmp, offset);
++
++  return RegisterOrConstant(tmp);
 +}
 +
-+// Return
-+// Rindex: index into constant pool
-+// Rcache: address of cache entry - ConstantPoolCache::base_offset()
-+//
-+// A caller must add ConstantPoolCache::base_offset() to Rcache to get
-+// the true address of the cache entry.
++// Calls to C land
 +//
-+void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
-+                                                           Register index,
-+                                                           int bcp_offset,
-+                                                           size_t index_size) {
-+  assert_different_registers(cache, index);
-+  assert_different_registers(cache, xcpool);
-+  get_cache_index_at_bcp(index, bcp_offset, index_size);
-+  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
-+  // Convert from field index to ConstantPoolCacheEntry
-+  // riscv already has the cache in xcpool so there is no need to
-+  // install it in cache. Instead we pre-add the indexed offset to
-+  // xcpool and return it in cache. All clients of this method need to
-+  // be modified accordingly.
-+  shadd(cache, index, xcpool, cache, 5);
-+}
++// When entering C land, the fp, & esp of the last Java frame have to be recorded
++// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
++// has to be reset to 0. This is required to allow proper stack traversal.
++void MacroAssembler::set_last_Java_frame(Register last_java_sp,
++                                         Register last_java_fp,
++                                         Register last_java_pc,
++                                         Register tmp) {
 +
++  if (last_java_pc->is_valid()) {
++      sd(last_java_pc, Address(xthread,
++                               JavaThread::frame_anchor_offset() +
++                               JavaFrameAnchor::last_Java_pc_offset()));
++  }
 +
-+void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
-+                                                                        Register index,
-+                                                                        Register bytecode,
-+                                                                        int byte_no,
-+                                                                        int bcp_offset,
-+                                                                        size_t index_size) {
-+  get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
-+  // We use a 32-bit load here since the layout of 64-bit words on
-+  // little-endian machines allow us that.
-+  // n.b. unlike x86 cache already includes the index offset
-+  la(bytecode, Address(cache,
-+                       ConstantPoolCache::base_offset() +
-+                       ConstantPoolCacheEntry::indices_offset()));
-+  membar(MacroAssembler::AnyAny);
-+  lwu(bytecode, bytecode);
-+  membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
-+  const int shift_count = (1 + byte_no) * BitsPerByte;
-+  slli(bytecode, bytecode, XLEN - (shift_count + BitsPerByte));
-+  srli(bytecode, bytecode, XLEN - BitsPerByte);
-+}
++  // determine last_java_sp register
++  if (last_java_sp == sp) {
++    mv(tmp, sp);
++    last_java_sp = tmp;
++  } else if (!last_java_sp->is_valid()) {
++    last_java_sp = esp;
++  }
 +
-+void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
-+                                                               Register tmp,
-+                                                               int bcp_offset,
-+                                                               size_t index_size) {
-+  assert(cache != tmp, "must use different register");
-+  get_cache_index_at_bcp(tmp, bcp_offset, index_size);
-+  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
-+  // Convert from field index to ConstantPoolCacheEntry index
-+  // and from word offset to byte offset
-+  assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord,
-+         "else change next line");
-+  ld(cache, Address(fp, frame::interpreter_frame_cache_offset * wordSize));
-+  // skip past the header
-+  add(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
-+  // construct pointer to cache entry
-+  shadd(cache, tmp, cache, tmp, 2 + LogBytesPerWord);
++  sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset()));
++
++  // last_java_fp is optional
++  if (last_java_fp->is_valid()) {
++    sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset()));
++  }
 +}
 +
-+// Load object from cpool->resolved_references(index)
-+void InterpreterMacroAssembler::load_resolved_reference_at_index(
-+                                Register result, Register index, Register tmp) {
-+  assert_different_registers(result, index);
++void MacroAssembler::set_last_Java_frame(Register last_java_sp,
++                                         Register last_java_fp,
++                                         address  last_java_pc,
++                                         Register tmp) {
++  assert(last_java_pc != NULL, "must provide a valid PC");
 +
-+  get_constant_pool(result);
-+  // Load pointer for resolved_references[] objArray
-+  ld(result, Address(result, ConstantPool::cache_offset_in_bytes()));
-+  ld(result, Address(result, ConstantPoolCache::resolved_references_offset_in_bytes()));
-+  resolve_oop_handle(result, tmp);
-+  // Add in the index
-+  addi(index, index, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
-+  shadd(result, index, result, index, LogBytesPerHeapOop);
-+  load_heap_oop(result, Address(result, 0));
-+}
++  la(tmp, last_java_pc);
++  sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
 +
-+void InterpreterMacroAssembler::load_resolved_klass_at_offset(
-+                                Register cpool, Register index, Register klass, Register temp) {
-+  shadd(temp, index, cpool, temp, LogBytesPerWord);
-+  lhu(temp, Address(temp, sizeof(ConstantPool))); // temp = resolved_klass_index
-+  ld(klass, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes())); // klass = cpool->_resolved_klasses
-+  shadd(klass, temp, klass, temp, LogBytesPerWord);
-+  ld(klass, Address(klass, Array<Klass*>::base_offset_in_bytes()));
++  set_last_Java_frame(last_java_sp, last_java_fp, noreg, tmp);
 +}
 +
-+void InterpreterMacroAssembler::load_resolved_method_at_index(int byte_no,
-+                                                              Register method,
-+                                                              Register cache) {
-+  const int method_offset = in_bytes(
-+    ConstantPoolCache::base_offset() +
-+      ((byte_no == TemplateTable::f2_byte)
-+       ? ConstantPoolCacheEntry::f2_offset()
-+       : ConstantPoolCacheEntry::f1_offset()));
-+
-+  ld(method, Address(cache, method_offset)); // get f1 Method*
++void MacroAssembler::set_last_Java_frame(Register last_java_sp,
++                                         Register last_java_fp,
++                                         Label &L,
++                                         Register tmp) {
++  if (L.is_bound()) {
++    set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp);
++  } else {
++    L.add_patch_at(code(), locator());
++    set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp);
++  }
 +}
 +
-+// Generate a subtype check: branch to ok_is_subtype if sub_klass is a
-+// subtype of super_klass.
++// Just like safepoint_poll, but use an acquiring load for thread-
++// local polling.
 +//
-+// Args:
-+//      x10: superklass
-+//      Rsub_klass: subklass
++// We need an acquire here to ensure that any subsequent load of the
++// global SafepointSynchronize::_state flag is ordered after this load
++// of the local Thread::_polling page.  We don't want this poll to
++// return false (i.e. not safepointing) and a later poll of the global
++// SafepointSynchronize::_state spuriously to return true.
 +//
-+// Kills:
-+//      x12, x15
-+void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass,
-+                                                  Label& ok_is_subtype) {
-+  assert(Rsub_klass != x10, "x10 holds superklass");
-+  assert(Rsub_klass != x12, "x12 holds 2ndary super array length");
-+  assert(Rsub_klass != x15, "x15 holds 2ndary super array scan ptr");
++// This is to avoid a race when we're in a native->Java transition
++// racing the code which wakes up from a safepoint.
++//
++void MacroAssembler::safepoint_poll_acquire(Label& slow_path) {
++  if (SafepointMechanism::uses_thread_local_poll()) {
++    membar(MacroAssembler::AnyAny);
++    ld(t1, Address(xthread, Thread::polling_page_offset()));
++    membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
++    andi(t0, t1, SafepointMechanism::poll_bit());
++    bnez(t0, slow_path);
++  } else {
++    safepoint_poll(slow_path);
++  }
++}
 +
-+  // Profile the not-null value's klass.
-+  profile_typecheck(x12, Rsub_klass, x15); // blows x12, reloads x15
++void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
++  // we must set sp to zero to clear frame
++  sd(zr, Address(xthread, JavaThread::last_Java_sp_offset()));
 +
-+  // Do the check.
-+  check_klass_subtype(Rsub_klass, x10, x12, ok_is_subtype); // blows x12
++  // must clear fp, so that compiled frames are not confused; it is
++  // possible that we need it only for debugging
++  if (clear_fp) {
++    sd(zr, Address(xthread, JavaThread::last_Java_fp_offset()));
++  }
 +
-+  // Profile the failure of the check.
-+  profile_typecheck_failed(x12); // blows x12
++  // Always clear the pc because it could have been set by make_walkable()
++  sd(zr, Address(xthread, JavaThread::last_Java_pc_offset()));
 +}
 +
-+// Java Expression Stack
++void MacroAssembler::call_VM_base(Register oop_result,
++                                  Register java_thread,
++                                  Register last_java_sp,
++                                  address  entry_point,
++                                  int      number_of_arguments,
++                                  bool     check_exceptions) {
++   // determine java_thread register
++  if (!java_thread->is_valid()) {
++    java_thread = xthread;
++  }
++  // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = esp;
++  }
 +
-+void InterpreterMacroAssembler::pop_ptr(Register r) {
-+  ld(r, Address(esp, 0));
-+  addi(esp, esp, wordSize);
-+}
++  // debugging support
++  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
++  assert(java_thread == xthread, "unexpected register");
 +
-+void InterpreterMacroAssembler::pop_i(Register r) {
-+  lw(r, Address(esp, 0)); // lw do signed extended
-+  addi(esp, esp, wordSize);
-+}
++  assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
++  assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
 +
-+void InterpreterMacroAssembler::pop_l(Register r) {
-+  ld(r, Address(esp, 0));
-+  addi(esp, esp, 2 * Interpreter::stackElementSize);
-+}
++  // push java thread (becomes first argument of C function)
++  mv(c_rarg0, java_thread);
 +
-+void InterpreterMacroAssembler::push_ptr(Register r) {
-+  addi(esp, esp, -wordSize);
-+  sd(r, Address(esp, 0));
-+}
++  // set last Java frame before call
++  assert(last_java_sp != fp, "can't use fp");
 +
-+void InterpreterMacroAssembler::push_i(Register r) {
-+  addi(esp, esp, -wordSize);
-+  addw(r, r, zr); // signed extended
-+  sd(r, Address(esp, 0));
-+}
++  Label l;
++  set_last_Java_frame(last_java_sp, fp, l, t0);
 +
-+void InterpreterMacroAssembler::push_l(Register r) {
-+  addi(esp, esp, -2 * wordSize);
-+  sd(zr, Address(esp, wordSize));
-+  sd(r, Address(esp));
-+}
++  // do the call, remove parameters
++  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l);
 +
-+void InterpreterMacroAssembler::pop_f(FloatRegister r) {
-+  flw(r, esp, 0);
-+  addi(esp, esp, wordSize);
-+}
++  // reset last Java frame
++  // Only interpreter should have to clear fp
++  reset_last_Java_frame(true);
 +
-+void InterpreterMacroAssembler::pop_d(FloatRegister r) {
-+  fld(r, esp, 0);
-+  addi(esp, esp, 2 * Interpreter::stackElementSize);
-+}
++   // C++ interp handles this in the interpreter
++  check_and_handle_popframe(java_thread);
++  check_and_handle_earlyret(java_thread);
 +
-+void InterpreterMacroAssembler::push_f(FloatRegister r) {
-+  addi(esp, esp, -wordSize);
-+  fsw(r, Address(esp, 0));
++  if (check_exceptions) {
++    // check for pending exceptions (java_thread is set upon return)
++    ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset())));
++    Label ok;
++    beqz(t0, ok);
++    int32_t offset = 0;
++    la_patchable(t0, RuntimeAddress(StubRoutines::forward_exception_entry()), offset);
++    jalr(x0, t0, offset);
++    bind(ok);
++  }
++
++  // get oop result if there is one and reset the value in the thread
++  if (oop_result->is_valid()) {
++    get_vm_result(oop_result, java_thread);
++  }
 +}
 +
-+void InterpreterMacroAssembler::push_d(FloatRegister r) {
-+  addi(esp, esp, -2 * wordSize);
-+  fsd(r, Address(esp, 0));
++void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
++  ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
++  sd(zr, Address(java_thread, JavaThread::vm_result_offset()));
++  verify_oop(oop_result, "broken oop in call_VM_base");
 +}
 +
-+void InterpreterMacroAssembler::pop(TosState state) {
-+  switch (state) {
-+    case atos:
-+      pop_ptr();
-+      verify_oop(x10);
-+      break;
-+    case btos:  // fall through
-+    case ztos:  // fall through
-+    case ctos:  // fall through
-+    case stos:  // fall through
-+    case itos:
-+      pop_i();
-+      break;
-+    case ltos:
-+      pop_l();
-+      break;
-+    case ftos:
-+      pop_f();
-+      break;
-+    case dtos:
-+      pop_d();
-+      break;
-+    case vtos:
-+      /* nothing to do */
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
++void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
++  ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
++  sd(zr, Address(java_thread, JavaThread::vm_result_2_offset()));
 +}
 +
-+void InterpreterMacroAssembler::push(TosState state) {
-+  switch (state) {
-+    case atos:
-+      verify_oop(x10);
-+      push_ptr();
-+      break;
-+    case btos:  // fall through
-+    case ztos:  // fall through
-+    case ctos:  // fall through
-+    case stos:  // fall through
-+    case itos:
-+      push_i();
-+      break;
-+    case ltos:
-+      push_l();
-+      break;
-+    case ftos:
-+      push_f();
-+      break;
-+    case dtos:
-+      push_d();
-+      break;
-+    case vtos:
-+      /* nothing to do */
-+      break;
-+    default:
-+      ShouldNotReachHere();
++void MacroAssembler::verify_oop(Register reg, const char* s) {
++  if (!VerifyOops) { return; }
++
++  // Pass register number to verify_oop_subroutine
++  const char* b = NULL;
++  {
++    ResourceMark rm;
++    stringStream ss;
++    ss.print("verify_oop: %s: %s", reg->name(), s);
++    b = code_string(ss.as_string());
 +  }
-+}
++  BLOCK_COMMENT("verify_oop {");
 +
-+// Helpers for swap and dup
-+void InterpreterMacroAssembler::load_ptr(int n, Register val) {
-+  ld(val, Address(esp, Interpreter::expr_offset_in_bytes(n)));
-+}
++  push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
 +
-+void InterpreterMacroAssembler::store_ptr(int n, Register val) {
-+  sd(val, Address(esp, Interpreter::expr_offset_in_bytes(n)));
-+}
++  mv(c_rarg0, reg); // c_rarg0 : x10
++  // The length of the instruction sequence emitted should be independent
++  // of the values of the local char buffer address so that the size of mach
++  // nodes for scratch emit and normal emit matches.
++  movptr(t0, (address)b);
 +
-+void InterpreterMacroAssembler::load_float(Address src) {
-+  flw(f10, src);
-+}
++  // call indirectly to solve generation ordering problem
++  int32_t offset = 0;
++  la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset);
++  ld(t1, Address(t1, offset));
++  jalr(t1);
 +
-+void InterpreterMacroAssembler::load_double(Address src) {
-+  fld(f10, src);
-+}
++  pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
 +
-+void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() {
-+  // set sender sp
-+  mv(x30, sp);
-+  // record last_sp
-+  sd(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++  BLOCK_COMMENT("} verify_oop");
 +}
 +
-+// Jump to from_interpreted entry of a call unless single stepping is possible
-+// in this thread in which case we must call the i2i entry
-+void InterpreterMacroAssembler::jump_from_interpreted(Register method) {
-+  prepare_to_jump_from_interpreted();
-+  if (JvmtiExport::can_post_interpreter_events()) {
-+    Label run_compiled_code;
-+    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
-+    // compiled code in threads for which the event is enabled.  Check here for
-+    // interp_only_mode if these events CAN be enabled.
-+    lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset()));
-+    beqz(t0, run_compiled_code);
-+    ld(t0, Address(method, Method::interpreter_entry_offset()));
-+    jr(t0);
-+    bind(run_compiled_code);
++void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
++  if (!VerifyOops) {
++    return;
 +  }
 +
-+  ld(t0, Address(method, Method::from_interpreted_offset()));
-+  jr(t0);
-+}
-+
-+// The following two routines provide a hook so that an implementation
-+// can schedule the dispatch in two parts.  amd64 does not do this.
-+void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) {
-+}
++  const char* b = NULL;
++  {
++    ResourceMark rm;
++    stringStream ss;
++    ss.print("verify_oop_addr: %s", s);
++    b = code_string(ss.as_string());
++  }
++  BLOCK_COMMENT("verify_oop_addr {");
 +
-+void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
-+  dispatch_next(state, step);
-+}
++  push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
 +
-+void InterpreterMacroAssembler::dispatch_base(TosState state,
-+                                              address* table,
-+                                              bool verifyoop,
-+                                              bool generate_poll,
-+                                              Register Rs) {
-+  // Pay attention to the argument Rs, which is acquiesce in t0.
-+  if (VerifyActivationFrameSize) {
-+    Unimplemented();
-+  }
-+  if (verifyoop && state == atos) {
-+    verify_oop(x10);
++  if (addr.uses(sp)) {
++    la(x10, addr);
++    ld(x10, Address(x10, 4 * wordSize));
++  } else {
++    ld(x10, addr);
 +  }
 +
-+  Label safepoint;
-+  address* const safepoint_table = Interpreter::safept_table(state);
-+  bool needs_thread_local_poll = generate_poll && table != safepoint_table;
++  // The length of the instruction sequence emitted should be independent
++  // of the values of the local char buffer address so that the size of mach
++  // nodes for scratch emit and normal emit matches.
++  movptr(t0, (address)b);
 +
-+  if (needs_thread_local_poll) {
-+    NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
-+    ld(t1, Address(xthread, JavaThread::polling_word_offset()));
-+    andi(t1, t1, SafepointMechanism::poll_bit());
-+    bnez(t1, safepoint);
-+  }
-+  if (table == Interpreter::dispatch_table(state)) {
-+    li(t1, Interpreter::distance_from_dispatch_table(state));
-+    add(t1, Rs, t1);
-+    shadd(t1, t1, xdispatch, t1, 3);
++  // call indirectly to solve generation ordering problem
++  int32_t offset = 0;
++  la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset);
++  ld(t1, Address(t1, offset));
++  jalr(t1);
++
++  pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
++
++  BLOCK_COMMENT("} verify_oop_addr");
++}
++
++Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
++                                         int extra_slot_offset) {
++  // cf. TemplateTable::prepare_invoke(), if (load_receiver).
++  int stackElementSize = Interpreter::stackElementSize;
++  int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
++#ifdef ASSERT
++  int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
++  assert(offset1 - offset == stackElementSize, "correct arithmetic");
++#endif
++  if (arg_slot.is_constant()) {
++    return Address(esp, arg_slot.as_constant() * stackElementSize + offset);
 +  } else {
-+    mv(t1, (address)table);
-+    shadd(t1, Rs, t1, Rs, 3);
++    assert_different_registers(t0, arg_slot.as_register());
++    shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize));
++    return Address(t0, offset);
 +  }
-+  ld(t1, Address(t1));
-+  jr(t1);
++}
 +
-+  if (needs_thread_local_poll) {
-+    bind(safepoint);
-+    la(t1, ExternalAddress((address)safepoint_table));
-+    shadd(t1, Rs, t1, Rs, 3);
-+    ld(t1, Address(t1));
-+    jr(t1);
++#ifndef PRODUCT
++extern "C" void findpc(intptr_t x);
++#endif
++
++void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[])
++{
++  // In order to get locks to work, we need to fake a in_VM state
++  if (ShowMessageBoxOnError) {
++    JavaThread* thread = JavaThread::current();
++    JavaThreadState saved_state = thread->thread_state();
++    thread->set_thread_state(_thread_in_vm);
++#ifndef PRODUCT
++    if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
++      ttyLocker ttyl;
++      BytecodeCounter::print();
++    }
++#endif
++    if (os::message_box(msg, "Execution stopped, print registers?")) {
++      ttyLocker ttyl;
++      tty->print_cr(" pc = 0x%016lx", pc);
++#ifndef PRODUCT
++      tty->cr();
++      findpc(pc);
++      tty->cr();
++#endif
++      tty->print_cr(" x0 = 0x%016lx", regs[0]);
++      tty->print_cr(" x1 = 0x%016lx", regs[1]);
++      tty->print_cr(" x2 = 0x%016lx", regs[2]);
++      tty->print_cr(" x3 = 0x%016lx", regs[3]);
++      tty->print_cr(" x4 = 0x%016lx", regs[4]);
++      tty->print_cr(" x5 = 0x%016lx", regs[5]);
++      tty->print_cr(" x6 = 0x%016lx", regs[6]);
++      tty->print_cr(" x7 = 0x%016lx", regs[7]);
++      tty->print_cr(" x8 = 0x%016lx", regs[8]);
++      tty->print_cr(" x9 = 0x%016lx", regs[9]);
++      tty->print_cr("x10 = 0x%016lx", regs[10]);
++      tty->print_cr("x11 = 0x%016lx", regs[11]);
++      tty->print_cr("x12 = 0x%016lx", regs[12]);
++      tty->print_cr("x13 = 0x%016lx", regs[13]);
++      tty->print_cr("x14 = 0x%016lx", regs[14]);
++      tty->print_cr("x15 = 0x%016lx", regs[15]);
++      tty->print_cr("x16 = 0x%016lx", regs[16]);
++      tty->print_cr("x17 = 0x%016lx", regs[17]);
++      tty->print_cr("x18 = 0x%016lx", regs[18]);
++      tty->print_cr("x19 = 0x%016lx", regs[19]);
++      tty->print_cr("x20 = 0x%016lx", regs[20]);
++      tty->print_cr("x21 = 0x%016lx", regs[21]);
++      tty->print_cr("x22 = 0x%016lx", regs[22]);
++      tty->print_cr("x23 = 0x%016lx", regs[23]);
++      tty->print_cr("x24 = 0x%016lx", regs[24]);
++      tty->print_cr("x25 = 0x%016lx", regs[25]);
++      tty->print_cr("x26 = 0x%016lx", regs[26]);
++      tty->print_cr("x27 = 0x%016lx", regs[27]);
++      tty->print_cr("x28 = 0x%016lx", regs[28]);
++      tty->print_cr("x30 = 0x%016lx", regs[30]);
++      tty->print_cr("x31 = 0x%016lx", regs[31]);
++      BREAKPOINT;
++    }
 +  }
++  fatal("DEBUG MESSAGE: %s", msg);
 +}
 +
-+void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll, Register Rs) {
-+  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll, Rs);
-+}
++void MacroAssembler::resolve_jobject(Register value, Register thread, Register tmp) {
++  Label done, not_weak;
++  beqz(value, done);           // Use NULL as-is.
 +
-+void InterpreterMacroAssembler::dispatch_only_normal(TosState state, Register Rs) {
-+  dispatch_base(state, Interpreter::normal_table(state), Rs);
-+}
++  // Test for jweak tag.
++  andi(t0, value, JNIHandles::weak_tag_mask);
++  beqz(t0, not_weak);
 +
-+void InterpreterMacroAssembler::dispatch_only_noverify(TosState state, Register Rs) {
-+  dispatch_base(state, Interpreter::normal_table(state), false, Rs);
++  // Resolve jweak.
++  access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value,
++                 Address(value, -JNIHandles::weak_tag_value), tmp, thread);
++  verify_oop(value);
++  j(done);
++
++  bind(not_weak);
++  // Resolve (untagged) jobject.
++  access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread);
++  verify_oop(value);
++  bind(done);
 +}
 +
-+void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) {
-+  // load next bytecode
-+  load_unsigned_byte(t0, Address(xbcp, step));
-+  add(xbcp, xbcp, step);
-+  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
++void MacroAssembler::stop(const char* msg) {
++  address ip = pc();
++  pusha();
++  // The length of the instruction sequence emitted should be independent
++  // of the values of msg and ip so that the size of mach nodes for scratch
++  // emit and normal emit matches.
++  mv(c_rarg0, (address)msg);
++  mv(c_rarg1, (address)ip);
++  mv(c_rarg2, sp);
++  mv(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64));
++  jalr(c_rarg3);
++  ebreak();
 +}
 +
-+void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
-+  // load current bytecode
-+  lbu(t0, Address(xbcp, 0));
-+  dispatch_base(state, table);
++void MacroAssembler::unimplemented(const char* what) {
++  const char* buf = NULL;
++  {
++    ResourceMark rm;
++    stringStream ss;
++    ss.print("unimplemented: %s", what);
++    buf = code_string(ss.as_string());
++  }
++  stop(buf);
 +}
 +
-+// remove activation
-+//
-+// Apply stack watermark barrier.
-+// Unlock the receiver if this is a synchronized method.
-+// Unlock any Java monitors from syncronized blocks.
-+// Remove the activation from the stack.
-+//
-+// If there are locked Java monitors
-+//    If throw_monitor_exception
-+//       throws IllegalMonitorStateException
-+//    Else if install_monitor_exception
-+//       installs IllegalMonitorStateException
-+//    Else
-+//       no error processing
-+void InterpreterMacroAssembler::remove_activation(
-+                                TosState state,
-+                                bool throw_monitor_exception,
-+                                bool install_monitor_exception,
-+                                bool notify_jvmdi) {
-+  // Note: Registers x13 may be in use for the
-+  // result check if synchronized method
-+  Label unlocked, unlock, no_unlock;
++void MacroAssembler::emit_static_call_stub() {
++  // CompiledDirectStaticCall::set_to_interpreted knows the
++  // exact layout of this stub.
 +
-+  // The below poll is for the stack watermark barrier. It allows fixing up frames lazily,
-+  // that would normally not be safe to use. Such bad returns into unsafe territory of
-+  // the stack, will call InterpreterRuntime::at_unwind.
-+  Label slow_path;
-+  Label fast_path;
-+  safepoint_poll(slow_path, true /* at_return */, false /* acquire */, false /* in_nmethod */);
-+  j(fast_path);
-+
-+  bind(slow_path);
-+  push(state);
-+  set_last_Java_frame(esp, fp, (address)pc(), t0);
-+  super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), xthread);
-+  reset_last_Java_frame(true);
-+  pop(state);
++  mov_metadata(xmethod, (Metadata*)NULL);
 +
-+  bind(fast_path);
++  // Jump to the entry point of the i2c stub.
++  int32_t offset = 0;
++  movptr_with_offset(t0, 0, offset);
++  jalr(x0, t0, offset);
++}
 +
-+  // get the value of _do_not_unlock_if_synchronized into x13
-+  const Address do_not_unlock_if_synchronized(xthread,
-+    in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
-+  lbu(x13, do_not_unlock_if_synchronized);
-+  sb(zr, do_not_unlock_if_synchronized); // reset the flag
++void MacroAssembler::call_VM_leaf_base(address entry_point,
++                                       int number_of_arguments,
++                                       Label *retaddr) {
++  int32_t offset = 0;
++  push_reg(RegSet::of(t0, xmethod), sp);   // push << t0 & xmethod >> to sp
++  movptr_with_offset(t0, entry_point, offset);
++  jalr(x1, t0, offset);
++  if (retaddr != NULL) {
++    bind(*retaddr);
++  }
++  pop_reg(RegSet::of(t0, xmethod), sp);   // pop << t0 & xmethod >> from sp
++}
 +
-+  // get method access flags
-+  ld(x11, Address(fp, frame::interpreter_frame_method_offset * wordSize));
-+  ld(x12, Address(x11, Method::access_flags_offset()));
-+  andi(t0, x12, JVM_ACC_SYNCHRONIZED);
-+  beqz(t0, unlocked);
++void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
++  call_VM_leaf_base(entry_point, number_of_arguments);
++}
 +
-+  // Don't unlock anything if the _do_not_unlock_if_synchronized flag
-+  // is set.
-+  bnez(x13, no_unlock);
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
++  pass_arg0(this, arg_0);
++  call_VM_leaf_base(entry_point, 1);
++}
 +
-+  // unlock monitor
-+  push(state); // save result
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
++  pass_arg0(this, arg_0);
++  pass_arg1(this, arg_1);
++  call_VM_leaf_base(entry_point, 2);
++}
 +
-+  // BasicObjectLock will be first in list, since this is a
-+  // synchronized method. However, need to check that the object has
-+  // not been unlocked by an explicit monitorexit bytecode.
-+  const Address monitor(fp, frame::interpreter_frame_initial_sp_offset *
-+                        wordSize - (int) sizeof(BasicObjectLock));
-+  // We use c_rarg1 so that if we go slow path it will be the correct
-+  // register for unlock_object to pass to VM directly
-+  la(c_rarg1, monitor); // address of first monitor
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0,
++                                  Register arg_1, Register arg_2) {
++  pass_arg0(this, arg_0);
++  pass_arg1(this, arg_1);
++  pass_arg2(this, arg_2);
++  call_VM_leaf_base(entry_point, 3);
++}
 +
-+  ld(x10, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
-+  bnez(x10, unlock);
++void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
++  pass_arg0(this, arg_0);
++  MacroAssembler::call_VM_leaf_base(entry_point, 1);
++}
 +
-+  pop(state);
-+  if (throw_monitor_exception) {
-+    // Entry already unlocked, need to throw exception
-+    call_VM(noreg, CAST_FROM_FN_PTR(address,
-+                                    InterpreterRuntime::throw_illegal_monitor_state_exception));
-+    should_not_reach_here();
-+  } else {
-+    // Monitor already unlocked during a stack unroll. If requested,
-+    // install an illegal_monitor_state_exception.  Continue with
-+    // stack unrolling.
-+    if (install_monitor_exception) {
-+      call_VM(noreg, CAST_FROM_FN_PTR(address,
-+                                      InterpreterRuntime::new_illegal_monitor_state_exception));
-+    }
-+    j(unlocked);
-+  }
++void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
 +
-+  bind(unlock);
-+  unlock_object(c_rarg1);
-+  pop(state);
++  assert(arg_0 != c_rarg1, "smashed arg");
++  pass_arg1(this, arg_1);
++  pass_arg0(this, arg_0);
++  MacroAssembler::call_VM_leaf_base(entry_point, 2);
++}
 +
-+  // Check that for block-structured locking (i.e., that all locked
-+  // objects has been unlocked)
-+  bind(unlocked);
++void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
++  assert(arg_0 != c_rarg2, "smashed arg");
++  assert(arg_1 != c_rarg2, "smashed arg");
++  pass_arg2(this, arg_2);
++  assert(arg_0 != c_rarg1, "smashed arg");
++  pass_arg1(this, arg_1);
++  pass_arg0(this, arg_0);
++  MacroAssembler::call_VM_leaf_base(entry_point, 3);
++}
 +
-+  // x10: Might contain return value
++void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
++  assert(arg_0 != c_rarg3, "smashed arg");
++  assert(arg_1 != c_rarg3, "smashed arg");
++  assert(arg_2 != c_rarg3, "smashed arg");
++  pass_arg3(this, arg_3);
++  assert(arg_0 != c_rarg2, "smashed arg");
++  assert(arg_1 != c_rarg2, "smashed arg");
++  pass_arg2(this, arg_2);
++  assert(arg_0 != c_rarg1, "smashed arg");
++  pass_arg1(this, arg_1);
++  pass_arg0(this, arg_0);
++  MacroAssembler::call_VM_leaf_base(entry_point, 4);
++}
 +
-+  // Check that all monitors are unlocked
-+  {
-+    Label loop, exception, entry, restart;
-+    const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
-+    const Address monitor_block_top(
-+      fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
-+    const Address monitor_block_bot(
-+      fp, frame::interpreter_frame_initial_sp_offset * wordSize);
++void MacroAssembler::nop() {
++  addi(x0, x0, 0);
++}
 +
-+    bind(restart);
-+    // We use c_rarg1 so that if we go slow path it will be the correct
-+    // register for unlock_object to pass to VM directly
-+    ld(c_rarg1, monitor_block_top); // points to current entry, starting
-+                                     // with top-most entry
-+    la(x9, monitor_block_bot);  // points to word before bottom of
-+                                  // monitor block
++void MacroAssembler::mv(Register Rd, Register Rs) {
++  if (Rd != Rs) {
++    addi(Rd, Rs, 0);
++  }
++}
 +
-+    j(entry);
++void MacroAssembler::notr(Register Rd, Register Rs) {
++  xori(Rd, Rs, -1);
++}
 +
-+    // Entry already locked, need to throw exception
-+    bind(exception);
++void MacroAssembler::neg(Register Rd, Register Rs) {
++  sub(Rd, x0, Rs);
++}
 +
-+    if (throw_monitor_exception) {
-+      // Throw exception
-+      MacroAssembler::call_VM(noreg,
-+                              CAST_FROM_FN_PTR(address, InterpreterRuntime::
-+                                               throw_illegal_monitor_state_exception));
++void MacroAssembler::negw(Register Rd, Register Rs) {
++  subw(Rd, x0, Rs);
++}
 +
-+      should_not_reach_here();
-+    } else {
-+      // Stack unrolling. Unlock object and install illegal_monitor_exception.
-+      // Unlock does not block, so don't have to worry about the frame.
-+      // We don't have to preserve c_rarg1 since we are going to throw an exception.
++void MacroAssembler::sext_w(Register Rd, Register Rs) {
++  addiw(Rd, Rs, 0);
++}
 +
-+      push(state);
-+      unlock_object(c_rarg1);
-+      pop(state);
++void MacroAssembler::zext_b(Register Rd, Register Rs) {
++  andi(Rd, Rs, 0xFF);
++}
 +
-+      if (install_monitor_exception) {
-+        call_VM(noreg, CAST_FROM_FN_PTR(address,
-+                                        InterpreterRuntime::
-+                                        new_illegal_monitor_state_exception));
-+      }
++void MacroAssembler::seqz(Register Rd, Register Rs) {
++  sltiu(Rd, Rs, 1);
++}
 +
-+      j(restart);
-+    }
++void MacroAssembler::snez(Register Rd, Register Rs) {
++  sltu(Rd, x0, Rs);
++}
 +
-+    bind(loop);
-+    // check if current entry is used
-+    add(t0, c_rarg1, BasicObjectLock::obj_offset_in_bytes());
-+    ld(t0, Address(t0, 0));
-+    bnez(t0, exception);
++void MacroAssembler::sltz(Register Rd, Register Rs) {
++  slt(Rd, Rs, x0);
++}
 +
-+    add(c_rarg1, c_rarg1, entry_size); // otherwise advance to next entry
-+    bind(entry);
-+    bne(c_rarg1, x9, loop); // check if bottom reached if not at bottom then check this entry
++void MacroAssembler::sgtz(Register Rd, Register Rs) {
++  slt(Rd, x0, Rs);
++}
++
++void MacroAssembler::fmv_s(FloatRegister Rd, FloatRegister Rs) {
++  if (Rd != Rs) {
++    fsgnj_s(Rd, Rs, Rs);
 +  }
++}
 +
-+  bind(no_unlock);
++void MacroAssembler::fabs_s(FloatRegister Rd, FloatRegister Rs) {
++  fsgnjx_s(Rd, Rs, Rs);
++}
 +
-+  // jvmti support
-+  if (notify_jvmdi) {
-+    notify_method_exit(state, NotifyJVMTI);    // preserve TOSCA
++void MacroAssembler::fneg_s(FloatRegister Rd, FloatRegister Rs) {
++  fsgnjn_s(Rd, Rs, Rs);
++}
 +
-+  } else {
-+    notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA
++void MacroAssembler::fmv_d(FloatRegister Rd, FloatRegister Rs) {
++  if (Rd != Rs) {
++    fsgnj_d(Rd, Rs, Rs);
 +  }
++}
 +
-+  // remove activation
-+  // get sender esp
-+  ld(t1,
-+     Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize));
-+  if (StackReservedPages > 0) {
-+    // testing if reserved zone needs to be re-enabled
-+    Label no_reserved_zone_enabling;
++void MacroAssembler::fabs_d(FloatRegister Rd, FloatRegister Rs) {
++  fsgnjx_d(Rd, Rs, Rs);
++}
 +
-+    ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset()));
-+    ble(t1, t0, no_reserved_zone_enabling);
++void MacroAssembler::fneg_d(FloatRegister Rd, FloatRegister Rs) {
++  fsgnjn_d(Rd, Rs, Rs);
++}
 +
-+    call_VM_leaf(
-+      CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), xthread);
-+    call_VM(noreg, CAST_FROM_FN_PTR(address,
-+                                    InterpreterRuntime::throw_delayed_StackOverflowError));
-+    should_not_reach_here();
++void MacroAssembler::vmnot_m(VectorRegister vd, VectorRegister vs) {
++  vmnand_mm(vd, vs, vs);
++}
 +
-+    bind(no_reserved_zone_enabling);
-+  }
++void MacroAssembler::vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm) {
++  vnsrl_wx(vd, vs, x0, vm);
++}
 +
-+  // restore sender esp
-+  mv(esp, t1);
++void MacroAssembler::vfneg_v(VectorRegister vd, VectorRegister vs) {
++  vfsgnjn_vv(vd, vs, vs);
++}
 +
-+  // remove frame anchor
-+  leave();
-+  // If we're returning to interpreted code we will shortly be
-+  // adjusting SP to allow some space for ESP.  If we're returning to
-+  // compiled code the saved sender SP was saved in sender_sp, so this
-+  // restores it.
-+  andi(sp, esp, -16);
++void MacroAssembler::la(Register Rd, const address &dest) {
++  int64_t offset = dest - pc();
++  if (is_offset_in_range(offset, 32)) {
++    auipc(Rd, (int32_t)offset + 0x800);  //0x800, Note:the 11th sign bit
++    addi(Rd, Rd, ((int64_t)offset << 52) >> 52);
++  } else {
++    movptr(Rd, dest);
++  }
 +}
 +
-+// Lock object
-+//
-+// Args:
-+//      c_rarg1: BasicObjectLock to be used for locking
-+//
-+// Kills:
-+//      x10
-+//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, .. (param regs)
-+//      t0, t1 (temp regs)
-+void InterpreterMacroAssembler::lock_object(Register lock_reg)
-+{
-+  assert(lock_reg == c_rarg1, "The argument is only for looks. It must be c_rarg1");
-+  if (UseHeavyMonitors) {
-+    call_VM(noreg,
-+            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
-+            lock_reg);
-+  } else {
-+    Label done;
-+
-+    const Register swap_reg = x10;
-+    const Register tmp = c_rarg2;
-+    const Register obj_reg = c_rarg3; // Will contain the oop
++void MacroAssembler::la(Register Rd, const Address &adr) {
++  switch (adr.getMode()) {
++    case Address::literal: {
++      relocInfo::relocType rtype = adr.rspec().reloc()->type();
++      if (rtype == relocInfo::none) {
++        mv(Rd, (intptr_t)(adr.target()));
++      } else {
++        relocate(adr.rspec());
++        movptr(Rd, adr.target());
++      }
++      break;
++    }
++    case Address::base_plus_offset: {
++      int32_t offset = 0;
++      baseOffset(Rd, adr, offset);
++      addi(Rd, Rd, offset);
++      break;
++    }
++    default:
++      ShouldNotReachHere();
++  }
++}
 +
-+    const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
-+    const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
-+    const int mark_offset = lock_offset +
-+                            BasicLock::displaced_header_offset_in_bytes();
++void MacroAssembler::la(Register Rd, Label &label) {
++  la(Rd, target(label));
++}
 +
-+    Label slow_case;
++#define INSN(NAME)                                                                \
++  void MacroAssembler::NAME##z(Register Rs, const address &dest) {                \
++    NAME(Rs, zr, dest);                                                           \
++  }                                                                               \
++  void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) {              \
++    NAME(Rs, zr, l, is_far);                                                      \
++  }                                                                               \
 +
-+    // Load object pointer into obj_reg c_rarg3
-+    ld(obj_reg, Address(lock_reg, obj_offset));
++  INSN(beq);
++  INSN(bne);
++  INSN(blt);
++  INSN(ble);
++  INSN(bge);
++  INSN(bgt);
 +
-+    if (DiagnoseSyncOnValueBasedClasses != 0) {
-+      load_klass(tmp, obj_reg);
-+      lwu(tmp, Address(tmp, Klass::access_flags_offset()));
-+      andi(tmp, tmp, JVM_ACC_IS_VALUE_BASED_CLASS);
-+      bnez(tmp, slow_case);
-+    }
++#undef INSN
 +
-+    // Load (object->mark() | 1) into swap_reg
-+    ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-+    ori(swap_reg, t0, 1);
++// Float compare branch instructions
 +
-+    // Save (object->mark() | 1) into BasicLock's displaced header
-+    sd(swap_reg, Address(lock_reg, mark_offset));
++#define INSN(NAME, FLOATCMP, BRANCH)                                                                                   \
++  void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) {  \
++    FLOATCMP##_s(t0, Rs1, Rs2);                                                                                        \
++    BRANCH(t0, l, is_far);                                                                                             \
++  }                                                                                                                    \
++  void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \
++    FLOATCMP##_d(t0, Rs1, Rs2);                                                                                        \
++    BRANCH(t0, l, is_far);                                                                                             \
++  }
 +
-+    assert(lock_offset == 0,
-+           "displached header must be first word in BasicObjectLock");
++  INSN(beq, feq, bnez);
++  INSN(bne, feq, beqz);
 +
-+    cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, done, /*fallthrough*/NULL);
++#undef INSN
 +
-+    // Test if the oopMark is an obvious stack pointer, i.e.,
-+    //  1) (mark & 7) == 0, and
-+    //  2) sp <= mark < mark + os::pagesize()
-+    //
-+    // These 3 tests can be done by evaluating the following
-+    // expression: ((mark - sp) & (7 - os::vm_page_size())),
-+    // assuming both stack pointer and pagesize have their
-+    // least significant 3 bits clear.
-+    // NOTE: the oopMark is in swap_reg x10 as the result of cmpxchg
-+    sub(swap_reg, swap_reg, sp);
-+    li(t0, (int64_t)(7 - os::vm_page_size()));
-+    andr(swap_reg, swap_reg, t0);
 +
-+    // Save the test result, for recursive case, the result is zero
-+    sd(swap_reg, Address(lock_reg, mark_offset));
-+    beqz(swap_reg, done);
++#define INSN(NAME, FLOATCMP1, FLOATCMP2)                                              \
++  void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,   \
++                                    bool is_far, bool is_unordered) {                 \
++    if (is_unordered) {                                                               \
++      /* jump if either source is NaN or condition is expected */                     \
++      FLOATCMP2##_s(t0, Rs2, Rs1);                                                    \
++      beqz(t0, l, is_far);                                                            \
++    } else {                                                                          \
++      /* jump if no NaN in source and condition is expected */                        \
++      FLOATCMP1##_s(t0, Rs1, Rs2);                                                    \
++      bnez(t0, l, is_far);                                                            \
++    }                                                                                 \
++  }                                                                                   \
++  void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,  \
++                                     bool is_far, bool is_unordered) {                \
++    if (is_unordered) {                                                               \
++      /* jump if either source is NaN or condition is expected */                     \
++      FLOATCMP2##_d(t0, Rs2, Rs1);                                                    \
++      beqz(t0, l, is_far);                                                            \
++    } else {                                                                          \
++      /* jump if no NaN in source and condition is expected */                        \
++      FLOATCMP1##_d(t0, Rs1, Rs2);                                                    \
++      bnez(t0, l, is_far);                                                            \
++    }                                                                                 \
++  }
 +
-+    bind(slow_case);
++  INSN(ble, fle, flt);
++  INSN(blt, flt, fle);
 +
-+    // Call the runtime routine for slow case
-+    call_VM(noreg,
-+            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
-+            lock_reg);
++#undef INSN
 +
-+    bind(done);
++#define INSN(NAME, CMP)                                                              \
++  void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,  \
++                                    bool is_far, bool is_unordered) {                \
++    float_##CMP(Rs2, Rs1, l, is_far, is_unordered);                                  \
++  }                                                                                  \
++  void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \
++                                     bool is_far, bool is_unordered) {               \
++    double_##CMP(Rs2, Rs1, l, is_far, is_unordered);                                 \
 +  }
-+}
 +
++  INSN(bgt, blt);
++  INSN(bge, ble);
 +
-+// Unlocks an object. Used in monitorexit bytecode and
-+// remove_activation.  Throws an IllegalMonitorException if object is
-+// not locked by current thread.
-+//
-+// Args:
-+//      c_rarg1: BasicObjectLock for lock
-+//
-+// Kills:
-+//      x10
-+//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, ... (param regs)
-+//      t0, t1 (temp regs)
-+void InterpreterMacroAssembler::unlock_object(Register lock_reg)
-+{
-+  assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1");
++#undef INSN
 +
-+  if (UseHeavyMonitors) {
-+    call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
-+  } else {
-+    Label done;
 +
-+    const Register swap_reg   = x10;
-+    const Register header_reg = c_rarg2;  // Will contain the old oopMark
-+    const Register obj_reg    = c_rarg3;  // Will contain the oop
++#define INSN(NAME, CSR)                       \
++  void MacroAssembler::NAME(Register Rd) {    \
++    csrr(Rd, CSR);                            \
++  }
 +
-+    save_bcp(); // Save in case of exception
++  INSN(rdinstret,  CSR_INSTERT);
++  INSN(rdcycle,    CSR_CYCLE);
++  INSN(rdtime,     CSR_TIME);
++  INSN(frcsr,      CSR_FCSR);
++  INSN(frrm,       CSR_FRM);
++  INSN(frflags,    CSR_FFLAGS);
 +
-+    // Convert from BasicObjectLock structure to object and BasicLock
-+    // structure Store the BasicLock address into x10
-+    la(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes()));
++#undef INSN
 +
-+    // Load oop into obj_reg(c_rarg3)
-+    ld(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
++void MacroAssembler::csrr(Register Rd, unsigned csr) {
++  csrrs(Rd, csr, x0);
++}
 +
-+    // Free entry
-+    sd(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
++#define INSN(NAME, OPFUN)                                      \
++  void MacroAssembler::NAME(unsigned csr, Register Rs) {       \
++    OPFUN(x0, csr, Rs);                                        \
++  }
 +
-+    // Load the old header from BasicLock structure
-+    ld(header_reg, Address(swap_reg,
-+                           BasicLock::displaced_header_offset_in_bytes()));
++  INSN(csrw, csrrw);
++  INSN(csrs, csrrs);
++  INSN(csrc, csrrc);
 +
-+    // Test for recursion
-+    beqz(header_reg, done);
++#undef INSN
 +
-+    // Atomic swap back the old header
-+    cmpxchg_obj_header(swap_reg, header_reg, obj_reg, t0, done, /*fallthrough*/NULL);
++#define INSN(NAME, OPFUN)                                      \
++  void MacroAssembler::NAME(unsigned csr, unsigned imm) {      \
++    OPFUN(x0, csr, imm);                                       \
++  }
 +
-+    // Call the runtime routine for slow case.
-+    sd(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj
-+    call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
++  INSN(csrwi, csrrwi);
++  INSN(csrsi, csrrsi);
++  INSN(csrci, csrrci);
 +
-+    bind(done);
++#undef INSN
 +
-+    restore_bcp();
++#define INSN(NAME, CSR)                                      \
++  void MacroAssembler::NAME(Register Rd, Register Rs) {      \
++    csrrw(Rd, CSR, Rs);                                      \
 +  }
-+}
 +
++  INSN(fscsr,   CSR_FCSR);
++  INSN(fsrm,    CSR_FRM);
++  INSN(fsflags, CSR_FFLAGS);
 +
-+void InterpreterMacroAssembler::test_method_data_pointer(Register mdp,
-+                                                         Label& zero_continue) {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
-+  ld(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
-+  beqz(mdp, zero_continue);
-+}
++#undef INSN
 +
-+// Set the method data pointer for the current bcp.
-+void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
-+  Label set_mdp;
-+  push_reg(0xc00, sp); // save x10, x11
++#define INSN(NAME)                              \
++  void MacroAssembler::NAME(Register Rs) {      \
++    NAME(x0, Rs);                               \
++  }
 +
-+  // Test MDO to avoid the call if it is NULL.
-+  ld(x10, Address(xmethod, in_bytes(Method::method_data_offset())));
-+  beqz(x10, set_mdp);
-+  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), xmethod, xbcp);
-+  // x10: mdi
-+  // mdo is guaranteed to be non-zero here, we checked for it before the call.
-+  ld(x11, Address(xmethod, in_bytes(Method::method_data_offset())));
-+  la(x11, Address(x11, in_bytes(MethodData::data_offset())));
-+  add(x10, x11, x10);
-+  sd(x10, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
-+  bind(set_mdp);
-+  pop_reg(0xc00, sp);
-+}
++  INSN(fscsr);
++  INSN(fsrm);
++  INSN(fsflags);
 +
-+void InterpreterMacroAssembler::verify_method_data_pointer() {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
-+#ifdef ASSERT
-+  Label verify_continue;
-+  add(sp, sp, -4 * wordSize);
-+  sd(x10, Address(sp, 0));
-+  sd(x11, Address(sp, wordSize));
-+  sd(x12, Address(sp, 2 * wordSize));
-+  sd(x13, Address(sp, 3 * wordSize));
-+  test_method_data_pointer(x13, verify_continue); // If mdp is zero, continue
-+  get_method(x11);
++#undef INSN
 +
-+  // If the mdp is valid, it will point to a DataLayout header which is
-+  // consistent with the bcp.  The converse is highly probable also.
-+  lh(x12, Address(x13, in_bytes(DataLayout::bci_offset())));
-+  ld(t0, Address(x11, Method::const_offset()));
-+  add(x12, x12, t0);
-+  la(x12, Address(x12, ConstMethod::codes_offset()));
-+  beq(x12, xbcp, verify_continue);
-+  // x10: method
-+  // xbcp: bcp // xbcp == 22
-+  // x13: mdp
-+  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp),
-+               x11, xbcp, x13);
-+  bind(verify_continue);
-+  ld(x10, Address(sp, 0));
-+  ld(x11, Address(sp, wordSize));
-+  ld(x12, Address(sp, 2 * wordSize));
-+  ld(x13, Address(sp, 3 * wordSize));
-+  add(sp, sp, 4 * wordSize);
-+#endif // ASSERT
++void MacroAssembler::fsrmi(Register Rd, unsigned imm) {
++  guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register");
++  csrrwi(Rd, CSR_FRM, imm);
 +}
 +
-+
-+void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in,
-+                                                int constant,
-+                                                Register value) {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
-+  Address data(mdp_in, constant);
-+  sd(value, data);
++void MacroAssembler::fsflagsi(Register Rd, unsigned imm) {
++   csrrwi(Rd, CSR_FFLAGS, imm);
 +}
 +
++#define INSN(NAME)                             \
++  void MacroAssembler::NAME(unsigned imm) {    \
++    NAME(x0, imm);                             \
++  }
 +
-+void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
-+                                                      int constant,
-+                                                      bool decrement) {
-+  increment_mdp_data_at(mdp_in, noreg, constant, decrement);
-+}
++  INSN(fsrmi);
++  INSN(fsflagsi);
 +
-+void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
-+                                                      Register reg,
-+                                                      int constant,
-+                                                      bool decrement) {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
-+  // %%% this does 64bit counters at best it is wasting space
-+  // at worst it is a rare bug when counters overflow
++#undef INSN
 +
-+  assert_different_registers(t1, t0, mdp_in, reg);
++void MacroAssembler::push_reg(Register Rs)
++{
++  addi(esp, esp, 0 - wordSize);
++  sd(Rs, Address(esp, 0));
++}
 +
-+  Address addr1(mdp_in, constant);
-+  Address addr2(t1, 0);
-+  Address &addr = addr1;
-+  if (reg != noreg) {
-+    la(t1, addr1);
-+    add(t1, t1, reg);
-+    addr = addr2;
-+  }
++void MacroAssembler::pop_reg(Register Rd)
++{
++  ld(Rd, esp, 0);
++  addi(esp, esp, wordSize);
++}
 +
-+  if (decrement) {
-+    ld(t0, addr);
-+    addi(t0, t0, -DataLayout::counter_increment);
-+    Label L;
-+    bltz(t0, L);      // skip store if counter underflow
-+    sd(t0, addr);
-+    bind(L);
-+  } else {
-+    assert(DataLayout::counter_increment == 1,
-+           "flow-free idiom only works with 1");
-+    ld(t0, addr);
-+    addi(t0, t0, DataLayout::counter_increment);
-+    Label L;
-+    blez(t0, L);       // skip store if counter overflow
-+    sd(t0, addr);
-+    bind(L);
++int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) {
++  int count = 0;
++  // Scan bitset to accumulate register pairs
++  for (int reg = 31; reg >= 0; reg--) {
++    if ((1U << 31) & bitset) {
++      regs[count++] = reg;
++    }
++    bitset <<= 1;
 +  }
++  return count;
 +}
 +
-+void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
-+                                                int flag_byte_constant) {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
-+  int flags_offset = in_bytes(DataLayout::flags_offset());
-+  // Set the flag
-+  lbu(t1, Address(mdp_in, flags_offset));
-+  ori(t1, t1, flag_byte_constant);
-+  sb(t1, Address(mdp_in, flags_offset));
-+}
++// Push integer registers in the bitset supplied. Don't push sp.
++// Return the number of words pushed
++int MacroAssembler::push_reg(unsigned int bitset, Register stack) {
++  DEBUG_ONLY(int words_pushed = 0;)
++  CompressibleRegion cr(this);
 +
++  unsigned char regs[32];
++  int count = bitset_to_regs(bitset, regs);
++  // reserve one slot to align for odd count
++  int offset = is_even(count) ? 0 : wordSize;
 +
-+void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in,
-+                                                 int offset,
-+                                                 Register value,
-+                                                 Register test_value_out,
-+                                                 Label& not_equal_continue) {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
-+  if (test_value_out == noreg) {
-+    ld(t1, Address(mdp_in, offset));
-+    bne(value, t1, not_equal_continue);
-+  } else {
-+    // Put the test value into a register, so caller can use it:
-+    ld(test_value_out, Address(mdp_in, offset));
-+    bne(value, test_value_out, not_equal_continue);
++  if (count) {
++    addi(stack, stack, -count * wordSize - offset);
++  }
++  for (int i = count - 1; i >= 0; i--) {
++    sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
++    DEBUG_ONLY(words_pushed++;)
 +  }
-+}
-+
 +
-+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
-+                                                     int offset_of_disp) {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
-+  ld(t1, Address(mdp_in, offset_of_disp));
-+  add(mdp_in, mdp_in, t1);
-+  sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
-+}
++  assert(words_pushed == count, "oops, pushed != count");
 +
-+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
-+                                                     Register reg,
-+                                                     int offset_of_disp) {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
-+  add(t1, mdp_in, reg);
-+  ld(t1, Address(t1, offset_of_disp));
-+  add(mdp_in, mdp_in, t1);
-+  sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
++  return count;
 +}
 +
++int MacroAssembler::pop_reg(unsigned int bitset, Register stack) {
++  DEBUG_ONLY(int words_popped = 0;)
++  CompressibleRegion cr(this);
 +
-+void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in,
-+                                                       int constant) {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
-+  addi(mdp_in, mdp_in, (unsigned)constant);
-+  sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
-+}
++  unsigned char regs[32];
++  int count = bitset_to_regs(bitset, regs);
++  // reserve one slot to align for odd count
++  int offset = is_even(count) ? 0 : wordSize;
 +
++  for (int i = count - 1; i >= 0; i--) {
++    ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
++    DEBUG_ONLY(words_popped++;)
++  }
 +
-+void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) {
-+  assert(ProfileInterpreter, "must be profiling interpreter");
++  if (count) {
++    addi(stack, stack, count * wordSize + offset);
++  }
++  assert(words_popped == count, "oops, popped != count");
 +
-+  // save/restore across call_VM
-+  addi(sp, sp, -2 * wordSize);
-+  sd(zr, Address(sp, 0));
-+  sd(return_bci, Address(sp, wordSize));
-+  call_VM(noreg,
-+          CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret),
-+          return_bci);
-+  ld(zr, Address(sp, 0));
-+  ld(return_bci, Address(sp, wordSize));
-+  addi(sp, sp, 2 * wordSize);
++  return count;
 +}
 +
-+void InterpreterMacroAssembler::profile_taken_branch(Register mdp,
-+                                                     Register bumped_count) {
-+  if (ProfileInterpreter) {
-+    Label profile_continue;
++// Push floating-point registers in the bitset supplied.
++// Return the number of words pushed
++int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
++  CompressibleRegion cr(this);
++  DEBUG_ONLY(int words_pushed = 0;)
++  unsigned char regs[32];
++  int count = bitset_to_regs(bitset, regs);
++  int push_slots = count + (count & 1);
 +
-+    // If no method data exists, go to profile_continue.
-+    // Otherwise, assign to mdp
-+    test_method_data_pointer(mdp, profile_continue);
++  if (count) {
++    addi(stack, stack, -push_slots * wordSize);
++  }
 +
-+    // We are taking a branch.  Increment the taken count.
-+    Address data(mdp, in_bytes(JumpData::taken_offset()));
-+    ld(bumped_count, data);
-+    assert(DataLayout::counter_increment == 1,
-+            "flow-free idiom only works with 1");
-+    addi(bumped_count, bumped_count, DataLayout::counter_increment);
-+    Label L;
-+    // eg: bumped_count=0x7fff ffff ffff ffff  + 1 < 0. so we use <= 0;
-+    blez(bumped_count, L);       // skip store if counter overflow,
-+    sd(bumped_count, data);
-+    bind(L);
-+    // The method data pointer needs to be updated to reflect the new target.
-+    update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()));
-+    bind(profile_continue);
++  for (int i = count - 1; i >= 0; i--) {
++    fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize));
++    DEBUG_ONLY(words_pushed++;)
 +  }
-+}
 +
-+void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) {
-+  if (ProfileInterpreter) {
-+    Label profile_continue;
++  assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count);
 +
-+    // If no method data exists, go to profile_continue.
-+    test_method_data_pointer(mdp, profile_continue);
++  return count;
++}
 +
-+    // We are taking a branch.  Increment the not taken count.
-+    increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()));
++int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
++  CompressibleRegion cr(this);
++  DEBUG_ONLY(int words_popped = 0;)
++  unsigned char regs[32];
++  int count = bitset_to_regs(bitset, regs);
++  int pop_slots = count + (count & 1);
 +
-+    // The method data pointer needs to be updated to correspond to
-+    // the next bytecode
-+    update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size()));
-+    bind(profile_continue);
++  for (int i = count - 1; i >= 0; i--) {
++    fld(as_FloatRegister(regs[i]), Address(stack, (pop_slots - 1 - i) * wordSize));
++    DEBUG_ONLY(words_popped++;)
 +  }
-+}
-+
-+void InterpreterMacroAssembler::profile_call(Register mdp) {
-+  if (ProfileInterpreter) {
-+    Label profile_continue;
 +
-+    // If no method data exists, go to profile_continue.
-+    test_method_data_pointer(mdp, profile_continue);
++  if (count) {
++    addi(stack, stack, pop_slots * wordSize);
++  }
 +
-+    // We are making a call.  Increment the count.
-+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++  assert(words_popped == count, "oops, popped(%d) != count(%d)", words_popped, count);
 +
-+    // The method data pointer needs to be updated to reflect the new target.
-+    update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size()));
-+    bind(profile_continue);
-+  }
++  return count;
 +}
 +
-+void InterpreterMacroAssembler::profile_final_call(Register mdp) {
-+  if (ProfileInterpreter) {
-+    Label profile_continue;
-+
-+    // If no method data exists, go to profile_continue.
-+    test_method_data_pointer(mdp, profile_continue);
-+
-+    // We are making a call.  Increment the count.
-+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) {
++  CompressibleRegion cr(this);
++  // Push integer registers x7, x10-x17, x28-x31.
++  push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp);
 +
-+    // The method data pointer needs to be updated to reflect the new target.
-+    update_mdp_by_constant(mdp,
-+                           in_bytes(VirtualCallData::
-+                                    virtual_call_data_size()));
-+    bind(profile_continue);
++  // Push float registers f0-f7, f10-f17, f28-f31.
++  addi(sp, sp, - wordSize * 20);
++  int offset = 0;
++  for (int i = 0; i < 32; i++) {
++    if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
++      fsd(as_FloatRegister(i), Address(sp, wordSize * (offset++)));
++    }
 +  }
 +}
 +
-+
-+void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
-+                                                     Register mdp,
-+                                                     Register reg2,
-+                                                     bool receiver_can_be_null) {
-+  if (ProfileInterpreter) {
-+    Label profile_continue;
-+
-+    // If no method data exists, go to profile_continue.
-+    test_method_data_pointer(mdp, profile_continue);
-+
-+    Label skip_receiver_profile;
-+    if (receiver_can_be_null) {
-+      Label not_null;
-+      // We are making a call.  Increment the count for null receiver.
-+      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
-+      j(skip_receiver_profile);
-+      bind(not_null);
++void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) {
++  CompressibleRegion cr(this);
++  int offset = 0;
++  for (int i = 0; i < 32; i++) {
++    if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
++      fld(as_FloatRegister(i), Address(sp, wordSize * (offset++)));
 +    }
++  }
++  addi(sp, sp, wordSize * 20);
 +
-+    // Record the receiver type.
-+    record_klass_in_profile(receiver, mdp, reg2, true);
-+    bind(skip_receiver_profile);
-+
-+    // The method data pointer needs to be updated to reflect the new target.
++  pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp);
++}
 +
-+    update_mdp_by_constant(mdp,
-+                           in_bytes(VirtualCallData::
-+                                    virtual_call_data_size()));
-+    bind(profile_continue);
-+  }
++// Push all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4).
++void MacroAssembler::pusha() {
++  CompressibleRegion cr(this);
++  push_reg(RegSet::of(x1) + RegSet::range(x5, x31), sp);
 +}
 +
-+// This routine creates a state machine for updating the multi-row
-+// type profile at a virtual call site (or other type-sensitive bytecode).
-+// The machine visits each row (of receiver/count) until the receiver type
-+// is found, or until it runs out of rows.  At the same time, it remembers
-+// the location of the first empty row.  (An empty row records null for its
-+// receiver, and can be allocated for a newly-observed receiver type.)
-+// Because there are two degrees of freedom in the state, a simple linear
-+// search will not work; it must be a decision tree.  Hence this helper
-+// function is recursive, to generate the required tree structured code.
-+// It's the interpreter, so we are trading off code space for speed.
-+// See below for example code.
-+void InterpreterMacroAssembler::record_klass_in_profile_helper(
-+                                Register receiver, Register mdp,
-+                                Register reg2,
-+                                Label& done, bool is_virtual_call) {
-+  if (TypeProfileWidth == 0) {
-+    if (is_virtual_call) {
-+      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
-+    }
++// Pop all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4).
++void MacroAssembler::popa() {
++  CompressibleRegion cr(this);
++  pop_reg(RegSet::of(x1) + RegSet::range(x5, x31), sp);
++}
 +
-+  } else {
-+    int non_profiled_offset = -1;
-+    if (is_virtual_call) {
-+      non_profiled_offset = in_bytes(CounterData::count_offset());
-+    }
++void MacroAssembler::push_CPU_state() {
++  CompressibleRegion cr(this);
++  // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
++  push_reg(RegSet::range(x5, x31), sp);
 +
-+    record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth,
-+      &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset);
++  // float registers
++  addi(sp, sp, - 32 * wordSize);
++  for (int i = 0; i < 32; i++) {
++    fsd(as_FloatRegister(i), Address(sp, i * wordSize));
 +  }
 +}
 +
-+void InterpreterMacroAssembler::record_item_in_profile_helper(
-+  Register item, Register mdp, Register reg2, int start_row, Label& done, int total_rows,
-+  OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, int non_profiled_offset) {
-+  int last_row = total_rows - 1;
-+  assert(start_row <= last_row, "must be work left to do");
-+  // Test this row for both the item and for null.
-+  // Take any of three different outcomes:
-+  //   1. found item => increment count and goto done
-+  //   2. found null => keep looking for case 1, maybe allocate this cell
-+  //   3. found something else => keep looking for cases 1 and 2
-+  // Case 3 is handled by a recursive call.
-+  for (int row = start_row; row <= last_row; row++) {
-+    Label next_test;
-+    bool test_for_null_also = (row == start_row);
-+
-+    // See if the item is item[n].
-+    int item_offset = in_bytes(item_offset_fn(row));
-+    test_mdp_data_at(mdp, item_offset, item,
-+                     (test_for_null_also ? reg2 : noreg),
-+                     next_test);
-+    // (Reg2 now contains the item from the CallData.)
-+
-+    // The item is item[n].  Increment count[n].
-+    int count_offset = in_bytes(item_count_offset_fn(row));
-+    increment_mdp_data_at(mdp, count_offset);
-+    j(done);
-+    bind(next_test);
-+
-+    if (test_for_null_also) {
-+      Label found_null;
-+      // Failed the equality check on item[n]...  Test for null.
-+      if (start_row == last_row) {
-+        // The only thing left to do is handle the null case.
-+        if (non_profiled_offset >= 0) {
-+          beqz(reg2, found_null);
-+          // Item did not match any saved item and there is no empty row for it.
-+          // Increment total counter to indicate polymorphic case.
-+          increment_mdp_data_at(mdp, non_profiled_offset);
-+          j(done);
-+          bind(found_null);
-+        } else {
-+          bnez(reg2, done);
-+        }
-+        break;
-+      }
-+      // Since null is rare, make it be the branch-taken case.
-+      beqz(reg2, found_null);
-+
-+      // Put all the "Case 3" tests here.
-+      record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows,
-+        item_offset_fn, item_count_offset_fn, non_profiled_offset);
++void MacroAssembler::pop_CPU_state() {
++  CompressibleRegion cr(this);
 +
-+      // Found a null.  Keep searching for a matching item,
-+      // but remember that this is an empty (unused) slot.
-+      bind(found_null);
-+    }
++  // float registers
++  for (int i = 0; i < 32; i++) {
++    fld(as_FloatRegister(i), Address(sp, i * wordSize));
 +  }
++  addi(sp, sp, 32 * wordSize);
 +
-+  // In the fall-through case, we found no matching item, but we
-+  // observed the item[start_row] is NULL.
-+  // Fill in the item field and increment the count.
-+  int item_offset = in_bytes(item_offset_fn(start_row));
-+  set_mdp_data_at(mdp, item_offset, item);
-+  int count_offset = in_bytes(item_count_offset_fn(start_row));
-+  mv(reg2, DataLayout::counter_increment);
-+  set_mdp_data_at(mdp, count_offset, reg2);
-+  if (start_row > 0) {
-+    j(done);
-+  }
++  // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
++  pop_reg(RegSet::range(x5, x31), sp);
 +}
 +
-+// Example state machine code for three profile rows:
-+//   # main copy of decision tree, rooted at row[1]
-+//   if (row[0].rec == rec) then [
-+//     row[0].incr()
-+//     goto done
-+//   ]
-+//   if (row[0].rec != NULL) then [
-+//     # inner copy of decision tree, rooted at row[1]
-+//     if (row[1].rec == rec) then [
-+//       row[1].incr()
-+//       goto done
-+//     ]
-+//     if (row[1].rec != NULL) then [
-+//       # degenerate decision tree, rooted at row[2]
-+//       if (row[2].rec == rec) then [
-+//         row[2].incr()
-+//         goto done
-+//       ]
-+//       if (row[2].rec != NULL) then [
-+//         count.incr()
-+//         goto done
-+//       ] # overflow
-+//       row[2].init(rec)
-+//       goto done
-+//     ] else [
-+//       # remember row[1] is empty
-+//       if (row[2].rec == rec) then [
-+//         row[2].incr()
-+//         goto done
-+//       ]
-+//       row[1].init(rec)
-+//       goto done
-+//     ]
-+//   else [
-+//     # remember row[0] is empty
-+//     if (row[1].rec == rec) then [
-+//       row[1].incr()
-+//       goto done
-+//     ]
-+//     if (row[2].rec == rec) then [
-+//       row[2].incr()
-+//       goto done
-+//     ]
-+//     row[0].init(rec)
-+//     goto done
-+//   ]
-+//   done:
-+
-+void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
-+                                                        Register mdp, Register reg2,
-+                                                        bool is_virtual_call) {
-+  assert(ProfileInterpreter, "must be profiling");
-+  Label done;
-+
-+  record_klass_in_profile_helper(receiver, mdp, reg2, done, is_virtual_call);
++static int patch_offset_in_jal(address branch, int64_t offset) {
++  assert(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction!\n");
++  Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1);                       // offset[20]    ==> branch[31]
++  Assembler::patch(branch, 30, 21, (offset >> 1)  & 0x3ff);                     // offset[10:1]  ==> branch[30:21]
++  Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1);                       // offset[11]    ==> branch[20]
++  Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff);                      // offset[19:12] ==> branch[19:12]
++  return NativeInstruction::instruction_size;                                   // only one instruction
++}
 +
-+  bind(done);
++static int patch_offset_in_conditional_branch(address branch, int64_t offset) {
++  assert(is_imm_in_range(offset, 12, 1), "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne insrusction!\n");
++  Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1);                       // offset[12]    ==> branch[31]
++  Assembler::patch(branch, 30, 25, (offset >> 5)  & 0x3f);                      // offset[10:5]  ==> branch[30:25]
++  Assembler::patch(branch, 7,  7,  (offset >> 11) & 0x1);                       // offset[11]    ==> branch[7]
++  Assembler::patch(branch, 11, 8,  (offset >> 1)  & 0xf);                       // offset[4:1]   ==> branch[11:8]
++  return NativeInstruction::instruction_size;                                   // only one instruction
 +}
 +
-+void InterpreterMacroAssembler::profile_ret(Register return_bci, Register mdp) {
-+  if (ProfileInterpreter) {
-+    Label profile_continue;
++static int patch_offset_in_pc_relative(address branch, int64_t offset) {
++  const int PC_RELATIVE_INSTRUCTION_NUM = 2;                                    // auipc, addi/jalr/load
++  Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff);         // Auipc.          offset[31:12]  ==> branch[31:12]
++  Assembler::patch(branch + 4, 31, 20, offset & 0xfff);                         // Addi/Jalr/Load. offset[11:0]   ==> branch[31:20]
++  return PC_RELATIVE_INSTRUCTION_NUM * NativeInstruction::instruction_size;
++}
 +
-+    // If no method data exists, go to profile_continue.
-+    test_method_data_pointer(mdp, profile_continue);
++static int patch_addr_in_movptr(address branch, address target) {
++  const int MOVPTR_INSTRUCTIONS_NUM = 6;                                        // lui + addi + slli + addi + slli + addi/jalr/load
++  int32_t lower = ((intptr_t)target << 35) >> 35;
++  int64_t upper = ((intptr_t)target - lower) >> 29;
++  Assembler::patch(branch + 0,  31, 12, upper & 0xfffff);                       // Lui.             target[48:29] + target[28] ==> branch[31:12]
++  Assembler::patch(branch + 4,  31, 20, (lower >> 17) & 0xfff);                 // Addi.            target[28:17] ==> branch[31:20]
++  Assembler::patch(branch + 12, 31, 20, (lower >> 6) & 0x7ff);                  // Addi.            target[16: 6] ==> branch[31:20]
++  Assembler::patch(branch + 20, 31, 20, lower & 0x3f);                          // Addi/Jalr/Load.  target[ 5: 0] ==> branch[31:20]
++  return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
++}
 +
-+    // Update the total ret count.
-+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++static int patch_imm_in_li32(address branch, int32_t target) {
++  const int LI32_INSTRUCTIONS_NUM = 2;                                          // lui + addiw
++  int64_t upper = (intptr_t)target;
++  int32_t lower = (((int32_t)target) << 20) >> 20;
++  upper -= lower;
++  upper = (int32_t)upper;
++  Assembler::patch(branch + 0,  31, 12, (upper >> 12) & 0xfffff);               // Lui.
++  Assembler::patch(branch + 4,  31, 20, lower & 0xfff);                         // Addiw.
++  return LI32_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
++}
 +
-+    for (uint row = 0; row < RetData::row_limit(); row++) {
-+      Label next_test;
++static long get_offset_of_jal(address insn_addr) {
++  assert_cond(insn_addr != NULL);
++  long offset = 0;
++  unsigned insn = *(unsigned*)insn_addr;
++  long val = (long)Assembler::sextract(insn, 31, 12);
++  offset |= ((val >> 19) & 0x1) << 20;
++  offset |= (val & 0xff) << 12;
++  offset |= ((val >> 8) & 0x1) << 11;
++  offset |= ((val >> 9) & 0x3ff) << 1;
++  offset = (offset << 43) >> 43;
++  return offset;
++}
 +
-+      // See if return_bci is equal to bci[n]:
-+      test_mdp_data_at(mdp,
-+                       in_bytes(RetData::bci_offset(row)),
-+                       return_bci, noreg,
-+                       next_test);
++static long get_offset_of_conditional_branch(address insn_addr) {
++  long offset = 0;
++  assert_cond(insn_addr != NULL);
++  unsigned insn = *(unsigned*)insn_addr;
++  offset = (long)Assembler::sextract(insn, 31, 31);
++  offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11);
++  offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5);
++  offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1);
++  offset = (offset << 41) >> 41;
++  return offset;
++}
 +
-+      // return_bci is equal to bci[n].  Increment the count.
-+      increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)));
++static long get_offset_of_pc_relative(address insn_addr) {
++  long offset = 0;
++  assert_cond(insn_addr != NULL);
++  offset = ((long)(Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12))) << 12;                                  // Auipc.
++  offset += ((long)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20));                                         // Addi/Jalr/Load.
++  offset = (offset << 32) >> 32;
++  return offset;
++}
 +
-+      // The method data pointer needs to be updated to reflect the new target.
-+      update_mdp_by_offset(mdp,
-+                           in_bytes(RetData::bci_displacement_offset(row)));
-+      j(profile_continue);
-+      bind(next_test);
-+    }
++static address get_target_of_movptr(address insn_addr) {
++  assert_cond(insn_addr != NULL);
++  intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 29;    // Lui.
++  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 17;                        // Addi.
++  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 6;                         // Addi.
++  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20));                              // Addi/Jalr/Load.
++  return (address) target_address;
++}
 +
-+    update_mdp_for_ret(return_bci);
++static address get_target_of_li32(address insn_addr) {
++  assert_cond(insn_addr != NULL);
++  intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 12;    // Lui.
++  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20));                              // Addiw.
++  return (address)target_address;
++}
 +
-+    bind(profile_continue);
++// Patch any kind of instruction; there may be several instructions.
++// Return the total length (in bytes) of the instructions.
++int MacroAssembler::pd_patch_instruction_size(address branch, address target) {
++  assert_cond(branch != NULL);
++  int64_t offset = target - branch;
++  if (NativeInstruction::is_jal_at(branch)) {                         // jal
++    return patch_offset_in_jal(branch, offset);
++  } else if (NativeInstruction::is_branch_at(branch)) {               // beq/bge/bgeu/blt/bltu/bne
++    return patch_offset_in_conditional_branch(branch, offset);
++  } else if (NativeInstruction::is_pc_relative_at(branch)) {          // auipc, addi/jalr/load
++    return patch_offset_in_pc_relative(branch, offset);
++  } else if (NativeInstruction::is_movptr_at(branch)) {               // movptr
++    return patch_addr_in_movptr(branch, target);
++  } else if (NativeInstruction::is_li32_at(branch)) {                 // li32
++    int64_t imm = (intptr_t)target;
++    return patch_imm_in_li32(branch, (int32_t)imm);
++  } else {
++#ifdef ASSERT
++    tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n",
++                  *(unsigned*)branch, p2i(branch));
++    Disassembler::decode(branch - 16, branch + 16);
++#endif
++    ShouldNotReachHere();
++    return -1;
 +  }
 +}
 +
-+void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
-+  if (ProfileInterpreter) {
-+    Label profile_continue;
-+
-+    // If no method data exists, go to profile_continue.
-+    test_method_data_pointer(mdp, profile_continue);
++address MacroAssembler::target_addr_for_insn(address insn_addr) {
++  long offset = 0;
++  assert_cond(insn_addr != NULL);
++  if (NativeInstruction::is_jal_at(insn_addr)) {                     // jal
++    offset = get_offset_of_jal(insn_addr);
++  } else if (NativeInstruction::is_branch_at(insn_addr)) {           // beq/bge/bgeu/blt/bltu/bne
++    offset = get_offset_of_conditional_branch(insn_addr);
++  } else if (NativeInstruction::is_pc_relative_at(insn_addr)) {      // auipc, addi/jalr/load
++    offset = get_offset_of_pc_relative(insn_addr);
++  } else if (NativeInstruction::is_movptr_at(insn_addr)) {           // movptr
++    return get_target_of_movptr(insn_addr);
++  } else if (NativeInstruction::is_li32_at(insn_addr)) {             // li32
++    return get_target_of_li32(insn_addr);
++  } else {
++    ShouldNotReachHere();
++  }
++  return address(((uintptr_t)insn_addr + offset));
++}
 +
-+    set_mdp_flag_at(mdp, BitData::null_seen_byte_constant());
++int MacroAssembler::patch_oop(address insn_addr, address o) {
++  // OOPs are either narrow (32 bits) or wide (48 bits).  We encode
++  // narrow OOPs by setting the upper 16 bits in the first
++  // instruction.
++  if (NativeInstruction::is_li32_at(insn_addr)) {
++    // Move narrow OOP
++    narrowOop n = CompressedOops::encode((oop)o);
++    return patch_imm_in_li32(insn_addr, (int32_t)n);
++  } else if (NativeInstruction::is_movptr_at(insn_addr)) {
++    // Move wide OOP
++    return patch_addr_in_movptr(insn_addr, o);
++  }
++  ShouldNotReachHere();
++  return -1;
++}
 +
-+    // The method data pointer needs to be updated.
-+    int mdp_delta = in_bytes(BitData::bit_data_size());
-+    if (TypeProfileCasts) {
-+      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
++void MacroAssembler::reinit_heapbase() {
++  if (UseCompressedOops) {
++    if (Universe::is_fully_initialized()) {
++      mv(xheapbase, Universe::narrow_ptrs_base());
++    } else {
++      int32_t offset = 0;
++      la_patchable(xheapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()), offset);
++      ld(xheapbase, Address(xheapbase, offset));
 +    }
-+    update_mdp_by_constant(mdp, mdp_delta);
-+
-+    bind(profile_continue);
 +  }
 +}
 +
-+void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) {
-+    if (ProfileInterpreter && TypeProfileCasts) {
-+    Label profile_continue;
++void MacroAssembler::mv(Register Rd, Address dest) {
++  assert(dest.getMode() == Address::literal, "Address mode should be Address::literal");
++  relocate(dest.rspec());
++  movptr(Rd, dest.target());
++}
 +
-+    // If no method data exists, go to profile_continue.
-+    test_method_data_pointer(mdp, profile_continue);
++void MacroAssembler::mv(Register Rd, RegisterOrConstant src) {
++  if (src.is_register()) {
++    mv(Rd, src.as_register());
++  } else {
++    mv(Rd, src.as_constant());
++  }
++}
 +
-+    int count_offset = in_bytes(CounterData::count_offset());
-+    // Back up the address, since we have already bumped the mdp.
-+    count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
++void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) {
++  andr(Rd, Rs1, Rs2);
++  // addw: The result is clipped to 32 bits, then the sign bit is extended,
++  // and the result is stored in Rd
++  addw(Rd, Rd, zr);
++}
 +
-+    // *Decrement* the counter.  We expect to see zero or small negatives.
-+    increment_mdp_data_at(mdp, count_offset, true);
++void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) {
++  orr(Rd, Rs1, Rs2);
++  // addw: The result is clipped to 32 bits, then the sign bit is extended,
++  // and the result is stored in Rd
++  addw(Rd, Rd, zr);
++}
 +
-+    bind (profile_continue);
-+  }
++void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) {
++  xorr(Rd, Rs1, Rs2);
++  // addw: The result is clipped to 32 bits, then the sign bit is extended,
++  // and the result is stored in Rd
++  addw(Rd, Rd, zr);
 +}
 +
-+void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) {
-+  if (ProfileInterpreter) {
-+    Label profile_continue;
++// Note: load_unsigned_short used to be called load_unsigned_word.
++int MacroAssembler::load_unsigned_short(Register dst, Address src) {
++  int off = offset();
++  lhu(dst, src);
++  return off;
++}
 +
-+    // If no method data exists, go to profile_continue.
-+    test_method_data_pointer(mdp, profile_continue);
++int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
++  int off = offset();
++  lbu(dst, src);
++  return off;
++}
 +
-+    // The method data pointer needs to be updated.
-+    int mdp_delta = in_bytes(BitData::bit_data_size());
-+    if (TypeProfileCasts) {
-+      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
++int MacroAssembler::load_signed_short(Register dst, Address src) {
++  int off = offset();
++  lh(dst, src);
++  return off;
++}
 +
-+      // Record the object type.
-+      record_klass_in_profile(klass, mdp, reg2, false);
-+    }
-+    update_mdp_by_constant(mdp, mdp_delta);
++int MacroAssembler::load_signed_byte(Register dst, Address src) {
++  int off = offset();
++  lb(dst, src);
++  return off;
++}
 +
-+    bind(profile_continue);
++void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
++  switch (size_in_bytes) {
++    case  8:  ld(dst, src); break;
++    case  4:  is_signed ? lw(dst, src) : lwu(dst, src); break;
++    case  2:  is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
++    case  1:  is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
++    default:  ShouldNotReachHere();
 +  }
 +}
 +
-+void InterpreterMacroAssembler::profile_switch_default(Register mdp) {
-+  if (ProfileInterpreter) {
-+    Label profile_continue;
++void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
++  switch (size_in_bytes) {
++    case  8:  sd(src, dst); break;
++    case  4:  sw(src, dst); break;
++    case  2:  sh(src, dst); break;
++    case  1:  sb(src, dst); break;
++    default:  ShouldNotReachHere();
++  }
++}
 +
-+    // If no method data exists, go to profile_continue.
-+    test_method_data_pointer(mdp, profile_continue);
-+
-+    // Update the default case count
-+    increment_mdp_data_at(mdp,
-+                          in_bytes(MultiBranchData::default_count_offset()));
-+
-+    // The method data pointer needs to be updated.
-+    update_mdp_by_offset(mdp,
-+                         in_bytes(MultiBranchData::
-+                                  default_displacement_offset()));
-+
-+    bind(profile_continue);
++// reverse bytes in halfword in lower 16 bits and sign-extend
++// Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits)
++void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) {
++  if (UseZbb) {
++    rev8(Rd, Rs);
++    srai(Rd, Rd, 48);
++    return;
 +  }
++  assert_different_registers(Rs, tmp);
++  assert_different_registers(Rd, tmp);
++  srli(tmp, Rs, 8);
++  andi(tmp, tmp, 0xFF);
++  slli(Rd, Rs, 56);
++  srai(Rd, Rd, 48); // sign-extend
++  orr(Rd, Rd, tmp);
 +}
 +
-+void InterpreterMacroAssembler::profile_switch_case(Register index,
-+                                                    Register mdp,
-+                                                    Register reg2) {
-+  if (ProfileInterpreter) {
-+    Label profile_continue;
-+
-+    // If no method data exists, go to profile_continue.
-+    test_method_data_pointer(mdp, profile_continue);
-+
-+    // Build the base (index * per_case_size_in_bytes()) +
-+    // case_array_offset_in_bytes()
-+    mvw(reg2, in_bytes(MultiBranchData::per_case_size()));
-+    mvw(t0, in_bytes(MultiBranchData::case_array_offset()));
-+    Assembler::mul(index, index, reg2);
-+    Assembler::add(index, index, t0);
-+
-+    // Update the case count
-+    increment_mdp_data_at(mdp,
-+                          index,
-+                          in_bytes(MultiBranchData::relative_count_offset()));
++// reverse bytes in lower word and sign-extend
++// Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits)
++void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
++  if (UseZbb) {
++    rev8(Rd, Rs);
++    srai(Rd, Rd, 32);
++    return;
++  }
++  assert_different_registers(Rs, tmp1, tmp2);
++  assert_different_registers(Rd, tmp1, tmp2);
++  revb_h_w_u(Rd, Rs, tmp1, tmp2);
++  slli(tmp2, Rd, 48);
++  srai(tmp2, tmp2, 32); // sign-extend
++  srli(Rd, Rd, 16);
++  orr(Rd, Rd, tmp2);
++}
 +
-+    // The method data pointer need to be updated.
-+    update_mdp_by_offset(mdp,
-+                         index,
-+                         in_bytes(MultiBranchData::
-+                                  relative_displacement_offset()));
++// reverse bytes in halfword in lower 16 bits and zero-extend
++// Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
++void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) {
++  if (UseZbb) {
++    rev8(Rd, Rs);
++    srli(Rd, Rd, 48);
++    return;
++  }
++  assert_different_registers(Rs, tmp);
++  assert_different_registers(Rd, tmp);
++  srli(tmp, Rs, 8);
++  andi(tmp, tmp, 0xFF);
++  andi(Rd, Rs, 0xFF);
++  slli(Rd, Rd, 8);
++  orr(Rd, Rd, tmp);
++}
 +
-+    bind(profile_continue);
++// reverse bytes in halfwords in lower 32 bits and zero-extend
++// Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
++void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) {
++  if (UseZbb) {
++    rev8(Rd, Rs);
++    rori(Rd, Rd, 32);
++    roriw(Rd, Rd, 16);
++    zero_extend(Rd, Rd, 32);
++    return;
 +  }
++  assert_different_registers(Rs, tmp1, tmp2);
++  assert_different_registers(Rd, tmp1, tmp2);
++  srli(tmp2, Rs, 16);
++  revb_h_h_u(tmp2, tmp2, tmp1);
++  revb_h_h_u(Rd, Rs, tmp1);
++  slli(tmp2, tmp2, 16);
++  orr(Rd, Rd, tmp2);
 +}
 +
-+void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ; }
++// This method is only used for revb_h
++// Rd = Rs[47:0] Rs[55:48] Rs[63:56]
++void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) {
++  assert_different_registers(Rs, tmp1, tmp2);
++  assert_different_registers(Rd, tmp1);
++  srli(tmp1, Rs, 48);
++  andi(tmp2, tmp1, 0xFF);
++  slli(tmp2, tmp2, 8);
++  srli(tmp1, tmp1, 8);
++  orr(tmp1, tmp1, tmp2);
++  slli(Rd, Rs, 16);
++  orr(Rd, Rd, tmp1);
++}
 +
-+void InterpreterMacroAssembler::notify_method_entry() {
-+  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
-+  // track stack depth.  If it is possible to enter interp_only_mode we add
-+  // the code to check if the event should be sent.
-+  if (JvmtiExport::can_post_interpreter_events()) {
-+    Label L;
-+    lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset()));
-+    beqz(x13, L);
-+    call_VM(noreg, CAST_FROM_FN_PTR(address,
-+                                    InterpreterRuntime::post_method_entry));
-+    bind(L);
++// reverse bytes in each halfword
++// Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8]
++void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) {
++  if (UseZbb) {
++    assert_different_registers(Rs, tmp1);
++    assert_different_registers(Rd, tmp1);
++    rev8(Rd, Rs);
++    zero_extend(tmp1, Rd, 32);
++    roriw(tmp1, tmp1, 16);
++    slli(tmp1, tmp1, 32);
++    srli(Rd, Rd, 32);
++    roriw(Rd, Rd, 16);
++    zero_extend(Rd, Rd, 32);
++    orr(Rd, Rd, tmp1);
++    return;
++  }
++  assert_different_registers(Rs, tmp1, tmp2);
++  assert_different_registers(Rd, tmp1, tmp2);
++  revb_h_helper(Rd, Rs, tmp1, tmp2);
++  for (int i = 0; i < 3; ++i) {
++    revb_h_helper(Rd, Rd, tmp1, tmp2);
 +  }
++}
 +
-+  {
-+    SkipIfEqual skip(this, &DTraceMethodProbes, false);
-+    get_method(c_rarg1);
-+    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
-+                 xthread, c_rarg1);
++// reverse bytes in each word
++// Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24]
++void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
++  if (UseZbb) {
++    rev8(Rd, Rs);
++    rori(Rd, Rd, 32);
++    return;
 +  }
++  assert_different_registers(Rs, tmp1, tmp2);
++  assert_different_registers(Rd, tmp1, tmp2);
++  revb(Rd, Rs, tmp1, tmp2);
++  ror_imm(Rd, Rd, 32);
++}
 +
-+  // RedefineClasses() tracing support for obsolete method entry
-+  if (log_is_enabled(Trace, redefine, class, obsolete)) {
-+    get_method(c_rarg1);
-+    call_VM_leaf(
-+      CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
-+      xthread, c_rarg1);
++// reverse bytes in doubleword
++// Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56]
++void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) {
++  if (UseZbb) {
++    rev8(Rd, Rs);
++    return;
++  }
++  assert_different_registers(Rs, tmp1, tmp2);
++  assert_different_registers(Rd, tmp1, tmp2);
++  andi(tmp1, Rs, 0xFF);
++  slli(tmp1, tmp1, 8);
++  for (int step = 8; step < 56; step += 8) {
++    srli(tmp2, Rs, step);
++    andi(tmp2, tmp2, 0xFF);
++    orr(tmp1, tmp1, tmp2);
++    slli(tmp1, tmp1, 8);
 +  }
++  srli(Rd, Rs, 56);
++  andi(Rd, Rd, 0xFF);
++  orr(Rd, tmp1, Rd);
 +}
 +
++// rotate right with shift bits
++void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp)
++{
++  if (UseZbb) {
++    rori(dst, src, shift);
++    return;
++  }
 +
-+void InterpreterMacroAssembler::notify_method_exit(
-+    TosState state, NotifyMethodExitMode mode) {
-+  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
-+  // track stack depth.  If it is possible to enter interp_only_mode we add
-+  // the code to check if the event should be sent.
-+  if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
-+    Label L;
-+    // Note: frame::interpreter_frame_result has a dependency on how the
-+    // method result is saved across the call to post_method_exit. If this
-+    // is changed then the interpreter_frame_result implementation will
-+    // need to be updated too.
++  assert_different_registers(dst, tmp);
++  assert_different_registers(src, tmp);
++  assert(shift < 64, "shift amount must be < 64");
++  slli(tmp, src, 64 - shift);
++  srli(dst, src, shift);
++  orr(dst, dst, tmp);
++}
 +
-+    // template interpreter will leave the result on the top of the stack.
-+    push(state);
-+    lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset()));
-+    beqz(x13, L);
-+    call_VM(noreg,
-+            CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
-+    bind(L);
-+    pop(state);
++void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) {
++  if (is_imm_in_range(imm, 12, 0)) {
++    and_imm12(Rd, Rn, imm);
++  } else {
++    assert_different_registers(Rn, tmp);
++    mv(tmp, imm);
++    andr(Rd, Rn, tmp);
 +  }
++}
 +
-+  {
-+    SkipIfEqual skip(this, &DTraceMethodProbes, false);
-+    push(state);
-+    get_method(c_rarg1);
-+    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
-+                 xthread, c_rarg1);
-+    pop(state);
++void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) {
++  ld(tmp1, adr);
++  if (src.is_register()) {
++    orr(tmp1, tmp1, src.as_register());
++  } else {
++    if (is_imm_in_range(src.as_constant(), 12, 0)) {
++      ori(tmp1, tmp1, src.as_constant());
++    } else {
++      assert_different_registers(tmp1, tmp2);
++      mv(tmp2, src.as_constant());
++      orr(tmp1, tmp1, tmp2);
++    }
 +  }
++  sd(tmp1, adr);
 +}
 +
-+
-+// Jump if ((*counter_addr += increment) & mask) satisfies the condition.
-+void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr,
-+                                                        int increment, Address mask,
-+                                                        Register tmp1, Register tmp2,
-+                                                        bool preloaded, Label* where) {
-+  Label done;
-+  if (!preloaded) {
-+    lwu(tmp1, counter_addr);
++void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L) {
++  if (UseCompressedClassPointers) {
++      lwu(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
++    if (Universe::narrow_klass_base() == NULL) {
++      slli(tmp, tmp, Universe::narrow_klass_shift());
++      beq(trial_klass, tmp, L);
++      return;
++    }
++    decode_klass_not_null(tmp);
++  } else {
++    ld(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
 +  }
-+  add(tmp1, tmp1, increment);
-+  sw(tmp1, counter_addr);
-+  lwu(tmp2, mask);
-+  andr(tmp1, tmp1, tmp2);
-+  bnez(tmp1, done);
-+  j(*where); // offset is too large so we have to use j instead of beqz here
-+  bind(done);
++  beq(trial_klass, tmp, L);
 +}
 +
-+void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point,
-+                                                  int number_of_arguments) {
-+  // interpreter specific
-+  //
-+  // Note: No need to save/restore rbcp & rlocals pointer since these
-+  //       are callee saved registers and no blocking/ GC can happen
-+  //       in leaf calls.
++// Move an oop into a register.  immediate is true if we want
++// immediate instructions, i.e. we are not going to patch this
++// instruction while the code is being executed by another thread.  In
++// that case we can use move immediates rather than the constant pool.
++void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) {
++  int oop_index;
++  if (obj == NULL) {
++    oop_index = oop_recorder()->allocate_oop_index(obj);
++  } else {
 +#ifdef ASSERT
-+  {
-+   Label L;
-+   ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
-+   beqz(t0, L);
-+   stop("InterpreterMacroAssembler::call_VM_leaf_base:"
-+        " last_sp != NULL");
-+   bind(L);
++    {
++      ThreadInVMfromUnknown tiv;
++      assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop");
++    }
++#endif
++    oop_index = oop_recorder()->find_index(obj);
 +  }
-+#endif /* ASSERT */
-+  // super call
-+  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
++  RelocationHolder rspec = oop_Relocation::spec(oop_index);
++  if (!immediate) {
++    address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address
++    ld_constant(dst, Address(dummy, rspec));
++  } else
++    mv(dst, Address((address)obj, rspec));
 +}
 +
-+void InterpreterMacroAssembler::call_VM_base(Register oop_result,
-+                                             Register java_thread,
-+                                             Register last_java_sp,
-+                                             address  entry_point,
-+                                             int      number_of_arguments,
-+                                             bool     check_exceptions) {
-+  // interpreter specific
-+  //
-+  // Note: Could avoid restoring locals ptr (callee saved) - however doesn't
-+  //       really make a difference for these runtime calls, since they are
-+  //       slow anyway. Btw., bcp must be saved/restored since it may change
-+  //       due to GC.
-+  save_bcp();
-+#ifdef ASSERT
-+  {
-+    Label L;
-+    ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
-+    beqz(t0, L);
-+    stop("InterpreterMacroAssembler::call_VM_base:"
-+         " last_sp != NULL");
-+    bind(L);
++// Move a metadata address into a register.
++void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
++  int oop_index;
++  if (obj == NULL) {
++    oop_index = oop_recorder()->allocate_metadata_index(obj);
++  } else {
++    oop_index = oop_recorder()->find_index(obj);
 +  }
-+#endif /* ASSERT */
-+  // super call
-+  MacroAssembler::call_VM_base(oop_result, noreg, last_java_sp,
-+                               entry_point, number_of_arguments,
-+                               check_exceptions);
-+// interpreter specific
-+  restore_bcp();
-+  restore_locals();
++  RelocationHolder rspec = metadata_Relocation::spec(oop_index);
++  mv(dst, Address((address)obj, rspec));
 +}
 +
-+void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr, Register tmp) {
-+  assert_different_registers(obj, tmp, t0, mdo_addr.base());
-+  Label update, next, none;
-+
-+  verify_oop(obj);
-+
-+  bnez(obj, update);
-+  orptr(mdo_addr, TypeEntries::null_seen, t0, tmp);
-+  j(next);
-+
-+  bind(update);
-+  load_klass(obj, obj);
-+
-+  ld(t0, mdo_addr);
-+  xorr(obj, obj, t0);
-+  andi(t0, obj, TypeEntries::type_klass_mask);
-+  beqz(t0, next); // klass seen before, nothing to
-+                  // do. The unknown bit may have been
-+                  // set already but no need to check.
++// Writes to stack successive pages until offset reached to check for
++// stack overflow + shadow pages.  This clobbers tmp.
++void MacroAssembler::bang_stack_size(Register size, Register tmp) {
++  assert_different_registers(tmp, size, t0);
++  // Bang stack for total size given plus shadow page size.
++  // Bang one page at a time because large size can bang beyond yellow and
++  // red zones.
++  mv(t0, os::vm_page_size());
++  Label loop;
++  bind(loop);
++  sub(tmp, sp, t0);
++  subw(size, size, t0);
++  sd(size, Address(tmp));
++  bgtz(size, loop);
 +
-+  andi(t0, obj, TypeEntries::type_unknown);
-+  bnez(t0, next);
-+  // already unknown. Nothing to do anymore.
++  // Bang down shadow pages too.
++  // At this point, (tmp-0) is the last address touched, so don't
++  // touch it again.  (It was touched as (tmp-pagesize) but then tmp
++  // was post-decremented.)  Skip this address by starting at i=1, and
++  // touch a few more pages below.  N.B.  It is important to touch all
++  // the way down to and including i=StackShadowPages.
++  for (int i = 0; i < (int)(JavaThread::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) {
++    // this could be any sized move but this is can be a debugging crumb
++    // so the bigger the better.
++    sub(tmp, tmp, os::vm_page_size());
++    sd(size, Address(tmp, 0));
++  }
++}
 +
-+  ld(t0, mdo_addr);
-+  beqz(t0, none);
-+  li(tmp, (u1)TypeEntries::null_seen);
-+  beq(t0, tmp, none);
-+  // There is a chance that the checks above (re-reading profiling
-+  // data from memory) fail if another thread has just set the
-+  // profiling to this obj's klass
-+  ld(t0, mdo_addr);
-+  xorr(obj, obj, t0);
-+  andi(t0, obj, TypeEntries::type_klass_mask);
-+  beqz(t0, next);
++SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) {
++  int32_t offset = 0;
++  _masm = masm;
++  _masm->la_patchable(t0, ExternalAddress((address)flag_addr), offset);
++  _masm->lbu(t0, Address(t0, offset));
++  _masm->beqz(t0, _label);
++}
 +
-+  // different than before. Cannot keep accurate profile.
-+  orptr(mdo_addr, TypeEntries::type_unknown, t0, tmp);
-+  j(next);
++SkipIfEqual::~SkipIfEqual() {
++  _masm->bind(_label);
++  _masm = NULL;
++}
 +
-+  bind(none);
-+  // first time here. Set profile type.
-+  sd(obj, mdo_addr);
++void MacroAssembler::load_mirror(Register dst, Register method, Register tmp) {
++  const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++  ld(dst, Address(xmethod, Method::const_offset()));
++  ld(dst, Address(dst, ConstMethod::constants_offset()));
++  ld(dst, Address(dst, ConstantPool::pool_holder_offset_in_bytes()));
++  ld(dst, Address(dst, mirror_offset));
++  resolve_oop_handle(dst, tmp);
++}
 +
-+  bind(next);
++void MacroAssembler::resolve_oop_handle(Register result, Register tmp) {
++  // OopHandle::resolve is an indirection.
++  assert_different_registers(result, tmp);
++  access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, noreg);
 +}
 +
-+void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) {
-+  if (!ProfileInterpreter) {
-+    return;
++void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
++                                    Register dst, Address src,
++                                    Register tmp1, Register thread_tmp) {
++  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  decorators = AccessInternal::decorator_fixup(decorators);
++  bool as_raw = (decorators & AS_RAW) != 0;
++  if (as_raw) {
++    bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
++  } else {
++    bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
 +  }
++}
 +
-+  if (MethodData::profile_arguments() || MethodData::profile_return()) {
-+    Label profile_continue;
-+
-+    test_method_data_pointer(mdp, profile_continue);
++void MacroAssembler::null_check(Register reg, int offset) {
++  if (needs_explicit_null_check(offset)) {
++    // provoke OS NULL exception if reg = NULL by
++    // accessing M[reg] w/o changing any registers
++    // NOTE: this is plenty to provoke a segv
++    ld(zr, Address(reg, 0));
++  } else {
++    // nothing to do, (later) access of M[reg + offset]
++    // will provoke OS NULL exception if reg = NULL
++  }
++}
 +
-+    int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size());
++void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
++                                     Address dst, Register src,
++                                     Register tmp1, Register thread_tmp) {
++  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  decorators = AccessInternal::decorator_fixup(decorators);
++  bool as_raw = (decorators & AS_RAW) != 0;
++  if (as_raw) {
++    bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, thread_tmp);
++  } else {
++    bs->store_at(this, decorators, type, dst, src, tmp1, thread_tmp);
++  }
++}
 +
-+    lbu(t0, Address(mdp, in_bytes(DataLayout::tag_offset()) - off_to_start));
-+    if (is_virtual) {
-+      li(tmp, (u1)DataLayout::virtual_call_type_data_tag);
-+      bne(t0, tmp, profile_continue);
++// Algorithm must match CompressedOops::encode.
++void MacroAssembler::encode_heap_oop(Register d, Register s) {
++  verify_oop(s, "broken oop in encode_heap_oop");
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      srli(d, s, LogMinObjAlignmentInBytes);
 +    } else {
-+      li(tmp, (u1)DataLayout::call_type_data_tag);
-+      bne(t0, tmp, profile_continue);
++      mv(d, s);
 +    }
++  } else {
++    Label notNull;
++    sub(d, s, xheapbase);
++    bgez(d, notNull);
++    mv(d, zr);
++    bind(notNull);
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      srli(d, d, Universe::narrow_oop_shift());
++    }
++  }
++}
 +
-+    // calculate slot step
-+    static int stack_slot_offset0 = in_bytes(TypeEntriesAtCall::stack_slot_offset(0));
-+    static int slot_step = in_bytes(TypeEntriesAtCall::stack_slot_offset(1)) - stack_slot_offset0;
-+
-+    // calculate type step
-+    static int argument_type_offset0 = in_bytes(TypeEntriesAtCall::argument_type_offset(0));
-+    static int type_step = in_bytes(TypeEntriesAtCall::argument_type_offset(1)) - argument_type_offset0;
++void MacroAssembler::load_klass(Register dst, Register src) {
++  if (UseCompressedClassPointers) {
++    lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
++    decode_klass_not_null(dst);
++  } else {
++    ld(dst, Address(src, oopDesc::klass_offset_in_bytes()));
++  }
++}
 +
-+    if (MethodData::profile_arguments()) {
-+      Label done, loop, loopEnd, profileArgument, profileReturnType;
-+      RegSet pushed_registers;
-+      pushed_registers += x15;
-+      pushed_registers += x16;
-+      pushed_registers += x17;
-+      Register mdo_addr = x15;
-+      Register index = x16;
-+      Register off_to_args = x17;
-+      push_reg(pushed_registers, sp);
++void MacroAssembler::store_klass(Register dst, Register src) {
++  // FIXME: Should this be a store release? concurrent gcs assumes
++  // klass length is valid if klass field is not null.
++  if (UseCompressedClassPointers) {
++    encode_klass_not_null(src);
++    sw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
++  } else {
++    sd(src, Address(dst, oopDesc::klass_offset_in_bytes()));
++  }
++}
 +
-+      mv(off_to_args, in_bytes(TypeEntriesAtCall::args_data_offset()));
-+      mv(t0, TypeProfileArgsLimit);
-+      beqz(t0, loopEnd);
++void MacroAssembler::store_klass_gap(Register dst, Register src) {
++  if (UseCompressedClassPointers) {
++    // Store to klass gap in destination
++    sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
++  }
++}
 +
-+      mv(index, zr); // index < TypeProfileArgsLimit
-+      bind(loop);
-+      bgtz(index, profileReturnType);
-+      li(t0, (int)MethodData::profile_return());
-+      beqz(t0, profileArgument); // (index > 0 || MethodData::profile_return()) == false
-+      bind(profileReturnType);
-+      // If return value type is profiled we may have no argument to profile
-+      ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())));
-+      mv(t1, - TypeStackSlotEntries::per_arg_count());
-+      mul(t1, index, t1);
-+      add(tmp, tmp, t1);
-+      li(t1, TypeStackSlotEntries::per_arg_count());
-+      add(t0, mdp, off_to_args);
-+      blt(tmp, t1, done);
++void  MacroAssembler::decode_klass_not_null(Register r) {
++  decode_klass_not_null(r, r);
++}
 +
-+      bind(profileArgument);
++void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) {
++  assert(UseCompressedClassPointers, "should only be used for compressed headers");
 +
-+      ld(tmp, Address(callee, Method::const_offset()));
-+      load_unsigned_short(tmp, Address(tmp, ConstMethod::size_of_parameters_offset()));
-+      // stack offset o (zero based) from the start of the argument
-+      // list, for n arguments translates into offset n - o - 1 from
-+      // the end of the argument list
-+      li(t0, stack_slot_offset0);
-+      li(t1, slot_step);
-+      mul(t1, index, t1);
-+      add(t0, t0, t1);
-+      add(t0, mdp, t0);
-+      ld(t0, Address(t0));
-+      sub(tmp, tmp, t0);
-+      addi(tmp, tmp, -1);
-+      Address arg_addr = argument_address(tmp);
-+      ld(tmp, arg_addr);
++  if (Universe::narrow_klass_base() == NULL) {
++    if (Universe::narrow_klass_shift() != 0) {
++      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++      slli(dst, src, LogKlassAlignmentInBytes);
++    } else {
++      mv(dst, src);
++    }
++    return;
++  }
 +
-+      li(t0, argument_type_offset0);
-+      li(t1, type_step);
-+      mul(t1, index, t1);
-+      add(t0, t0, t1);
-+      add(mdo_addr, mdp, t0);
-+      Address mdo_arg_addr(mdo_addr, 0);
-+      profile_obj_type(tmp, mdo_arg_addr, t1);
++  Register xbase = dst;
++  if (dst == src) {
++    xbase = tmp;
++  }
 +
-+      int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
-+      addi(off_to_args, off_to_args, to_add);
++  assert_different_registers(src, xbase);
++  mv(xbase, (uintptr_t)Universe::narrow_klass_base());
 +
-+      // increment index by 1
-+      addi(index, index, 1);
-+      li(t1, TypeProfileArgsLimit);
-+      blt(index, t1, loop);
-+      bind(loopEnd);
++  if (Universe::narrow_klass_shift() != 0) {
++    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++    assert_different_registers(t0, xbase);
++    shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes);
++  } else {
++    add(dst, xbase, src);
++  }
 +
-+      if (MethodData::profile_return()) {
-+        ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())));
-+        addi(tmp, tmp, -TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count());
-+      }
++  if (xbase == xheapbase) { reinit_heapbase(); }
++}
 +
-+      add(t0, mdp, off_to_args);
-+      bind(done);
-+      mv(mdp, t0);
++void MacroAssembler::encode_klass_not_null(Register r) {
++  encode_klass_not_null(r, r);
++}
 +
-+      // unspill the clobbered registers
-+      pop_reg(pushed_registers, sp);
++void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) {
++  assert(UseCompressedClassPointers, "should only be used for compressed headers");
 +
-+      if (MethodData::profile_return()) {
-+        // We're right after the type profile for the last
-+        // argument. tmp is the number of cells left in the
-+        // CallTypeData/VirtualCallTypeData to reach its end. Non null
-+        // if there's a return to profile.
-+        assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
-+        shadd(mdp, tmp, mdp, tmp, exact_log2(DataLayout::cell_size));
-+      }
-+      sd(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
++  if (Universe::narrow_klass_base() == NULL) {
++    if (Universe::narrow_klass_shift() != 0) {
++      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++      srli(dst, src, LogKlassAlignmentInBytes);
 +    } else {
-+      assert(MethodData::profile_return(), "either profile call args or call ret");
-+      update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size()));
++      mv(dst, src);
 +    }
++    return;
++  }
 +
-+    // mdp points right after the end of the
-+    // CallTypeData/VirtualCallTypeData, right after the cells for the
-+    // return value type if there's one
-+
-+    bind(profile_continue);
++  if (((uint64_t)(uintptr_t)Universe::narrow_klass_base() & 0xffffffff) == 0 &&
++      Universe::narrow_klass_shift() == 0) {
++    zero_extend(dst, src, 32);
++    return;
 +  }
-+}
 +
-+void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) {
-+  assert_different_registers(mdp, ret, tmp, xbcp, t0, t1);
-+  if (ProfileInterpreter && MethodData::profile_return()) {
-+    Label profile_continue, done;
++  Register xbase = dst;
++  if (dst == src) {
++    xbase = tmp;
++  }
 +
-+    test_method_data_pointer(mdp, profile_continue);
++  assert_different_registers(src, xbase);
++  mv(xbase, (intptr_t)Universe::narrow_klass_base());
++  sub(dst, src, xbase);
++  if (Universe::narrow_klass_shift() != 0) {
++    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++    srli(dst, dst, LogKlassAlignmentInBytes);
++  }
++  if (xbase == xheapbase) {
++    reinit_heapbase();
++  }
++}
 +
-+    if (MethodData::profile_return_jsr292_only()) {
-+      assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
++void  MacroAssembler::decode_heap_oop_not_null(Register r) {
++  decode_heap_oop_not_null(r, r);
++}
 +
-+      // If we don't profile all invoke bytecodes we must make sure
-+      // it's a bytecode we indeed profile. We can't go back to the
-+      // begining of the ProfileData we intend to update to check its
-+      // type because we're right after it and we don't known its
-+      // length
-+      Label do_profile;
-+      lbu(t0, Address(xbcp, 0));
-+      li(tmp, (u1)Bytecodes::_invokedynamic);
-+      beq(t0, tmp, do_profile);
-+      li(tmp, (u1)Bytecodes::_invokehandle);
-+      beq(t0, tmp, do_profile);
-+      get_method(tmp);
-+      lhu(t0, Address(tmp, Method::intrinsic_id_offset_in_bytes()));
-+      li(t1, static_cast<int>(vmIntrinsics::_compiledLambdaForm));
-+      bne(t0, t1, profile_continue);
-+      bind(do_profile);
++void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
++  assert(UseCompressedOops, "should only be used for compressed headers");
++  assert(Universe::heap() != NULL, "java heap should be initialized");
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    slli(dst, src, LogMinObjAlignmentInBytes);
++    if (Universe::narrow_oop_base() != NULL) {
++      add(dst, xheapbase, dst);
 +    }
-+
-+    Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size()));
-+    mv(tmp, ret);
-+    profile_obj_type(tmp, mdo_ret_addr, t1);
-+
-+    bind(profile_continue);
++  } else {
++    assert(Universe::narrow_oop_base() == NULL, "sanity");
++    mv(dst, src);
 +  }
 +}
 +
-+void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3) {
-+  assert_different_registers(t0, t1, mdp, tmp1, tmp2, tmp3);
-+  if (ProfileInterpreter && MethodData::profile_parameters()) {
-+    Label profile_continue, done;
-+
-+    test_method_data_pointer(mdp, profile_continue);
-+
-+    // Load the offset of the area within the MDO used for
-+    // parameters. If it's negative we're not profiling any parameters
-+    lwu(tmp1, Address(mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset())));
-+    srli(tmp2, tmp1, 31);
-+    bnez(tmp2, profile_continue);  // i.e. sign bit set
-+
-+    // Compute a pointer to the area for parameters from the offset
-+    // and move the pointer to the slot for the last
-+    // parameters. Collect profiling from last parameter down.
-+    // mdo start + parameters offset + array length - 1
-+    add(mdp, mdp, tmp1);
-+    ld(tmp1, Address(mdp, ArrayData::array_len_offset()));
-+    add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count());
++void  MacroAssembler::decode_heap_oop(Register d, Register s) {
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0 || d != s) {
++      slli(d, s, Universe::narrow_oop_shift());
++    }
++  } else {
++    Label done;
++    mv(d, s);
++    beqz(s, done);
++    shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes);
++    bind(done);
++  }
++  verify_oop(d, "broken oop in decode_heap_oop");
++}
 +
-+    Label loop;
-+    bind(loop);
++void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
++                                    Register thread_tmp, DecoratorSet decorators) {
++  access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
++}
 +
-+    int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0));
-+    int type_base = in_bytes(ParametersTypeData::type_offset(0));
-+    int per_arg_scale = exact_log2(DataLayout::cell_size);
-+    add(t0, mdp, off_base);
-+    add(t1, mdp, type_base);
++void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
++                                   Register thread_tmp, DecoratorSet decorators) {
++  access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
++}
 +
-+    shadd(tmp2, tmp1, t0, tmp2, per_arg_scale);
-+    // load offset on the stack from the slot for this parameter
-+    ld(tmp2, Address(tmp2, 0));
-+    neg(tmp2, tmp2);
++void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
++                                            Register thread_tmp, DecoratorSet decorators) {
++  access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, thread_tmp);
++}
 +
-+    // read the parameter from the local area
-+    shadd(tmp2, tmp2, xlocals, tmp2, Interpreter::logStackElementSize);
-+    ld(tmp2, Address(tmp2, 0));
++// Used for storing NULLs.
++void MacroAssembler::store_heap_oop_null(Address dst) {
++  access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg);
++}
 +
-+    // profile the parameter
-+    shadd(t1, tmp1, t1, t0, per_arg_scale);
-+    Address arg_type(t1, 0);
-+    profile_obj_type(tmp2, arg_type, tmp3);
++int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2,
++                                    bool want_remainder)
++{
++  // Full implementation of Java idiv and irem.  The function
++  // returns the (pc) offset of the div instruction - may be needed
++  // for implicit exceptions.
++  //
++  // input : rs1: dividend
++  //         rs2: divisor
++  //
++  // result: either
++  //         quotient  (= rs1 idiv rs2)
++  //         remainder (= rs1 irem rs2)
 +
-+    // go to next parameter
-+    add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count());
-+    bgez(tmp1, loop);
 +
-+    bind(profile_continue);
++  int idivl_offset = offset();
++  if (!want_remainder) {
++    divw(result, rs1, rs2);
++  } else {
++    remw(result, rs1, rs2); // result = rs1 % rs2;
 +  }
++  return idivl_offset;
 +}
 +
-+void InterpreterMacroAssembler::get_method_counters(Register method,
-+                                                    Register mcs, Label& skip) {
-+  Label has_counters;
-+  ld(mcs, Address(method, Method::method_counters_offset()));
-+  bnez(mcs, has_counters);
-+  call_VM(noreg, CAST_FROM_FN_PTR(address,
-+          InterpreterRuntime::build_method_counters), method);
-+  ld(mcs, Address(method, Method::method_counters_offset()));
-+  beqz(mcs, skip); // No MethodCounters allocated, OutOfMemory
-+  bind(has_counters);
-+}
++int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2,
++                                    bool want_remainder)
++{
++  // Full implementation of Java ldiv and lrem.  The function
++  // returns the (pc) offset of the div instruction - may be needed
++  // for implicit exceptions.
++  //
++  // input : rs1: dividend
++  //         rs2: divisor
++  //
++  // result: either
++  //         quotient  (= rs1 idiv rs2)
++  //         remainder (= rs1 irem rs2)
 +
-+#ifdef ASSERT
-+void InterpreterMacroAssembler::verify_access_flags(Register access_flags, uint32_t flag_bits,
-+                                                    const char* msg, bool stop_by_hit) {
-+  Label L;
-+  andi(t0, access_flags, flag_bits);
-+  if (stop_by_hit) {
-+    beqz(t0, L);
++  int idivq_offset = offset();
++  if (!want_remainder) {
++    div(result, rs1, rs2);
 +  } else {
-+    bnez(t0, L);
++    rem(result, rs1, rs2); // result = rs1 % rs2;
 +  }
-+  stop(msg);
-+  bind(L);
-+}
-+
-+void InterpreterMacroAssembler::verify_frame_setup() {
-+  Label L;
-+  const Address monitor_block_top(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
-+  ld(t0, monitor_block_top);
-+  beq(esp, t0, L);
-+  stop("broken stack frame setup in interpreter");
-+  bind(L);
++  return idivq_offset;
 +}
-+#endif
-diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
-new file mode 100644
-index 00000000000..4d8cb086f82
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
-@@ -0,0 +1,285 @@
-+/*
-+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#ifndef CPU_RISCV_INTERP_MASM_RISCV_HPP
-+#define CPU_RISCV_INTERP_MASM_RISCV_HPP
-+
-+#include "asm/macroAssembler.hpp"
-+#include "interpreter/invocationCounter.hpp"
-+#include "runtime/frame.hpp"
 +
-+// This file specializes the assember with interpreter-specific macros
-+
-+typedef ByteSize (*OffsetFunction)(uint);
-+
-+class InterpreterMacroAssembler: public MacroAssembler {
-+ protected:
-+  // Interpreter specific version of call_VM_base
-+  using MacroAssembler::call_VM_leaf_base;
++// Look up the method for a megamorpic invkkeinterface call.
++// The target method is determined by <intf_klass, itable_index>.
++// The receiver klass is in recv_klass.
++// On success, the result will be in method_result, and execution falls through.
++// On failure, execution transfers to the given label.
++void MacroAssembler::lookup_interface_method(Register recv_klass,
++                                             Register intf_klass,
++                                             RegisterOrConstant itable_index,
++                                             Register method_result,
++                                             Register scan_tmp,
++                                             Label& L_no_such_interface,
++                                             bool return_method) {
++  assert_different_registers(recv_klass, intf_klass, scan_tmp);
++  assert_different_registers(method_result, intf_klass, scan_tmp);
++  assert(recv_klass != method_result || !return_method,
++         "recv_klass can be destroyed when mehtid isn't needed");
++  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
++         "caller must be same register for non-constant itable index as for method");
 +
-+  virtual void call_VM_leaf_base(address entry_point,
-+                                 int number_of_arguments);
++  // Compute start of first itableOffsetEntry (which is at the end of the vtable).
++  int vtable_base = in_bytes(Klass::vtable_start_offset());
++  int itentry_off = itableMethodEntry::method_offset_in_bytes();
++  int scan_step   = itableOffsetEntry::size() * wordSize;
++  int vte_size    = vtableEntry::size_in_bytes();
++  assert(vte_size == wordSize, "else adjust times_vte_scale");
 +
-+  virtual void call_VM_base(Register oop_result,
-+                            Register java_thread,
-+                            Register last_java_sp,
-+                            address  entry_point,
-+                            int number_of_arguments,
-+                            bool check_exceptions);
++  lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset()));
 +
-+  // base routine for all dispatches
-+  void dispatch_base(TosState state, address* table, bool verifyoop = true,
-+                     bool generate_poll = false, Register Rs = t0);
++  // %%% Could store the aligned, prescaled offset in the klassoop.
++  shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3);
++  add(scan_tmp, scan_tmp, vtable_base);
 +
-+ public:
-+  InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {}
-+  virtual ~InterpreterMacroAssembler() {}
++  if (return_method) {
++    // Adjust recv_klass by scaled itable_index, so we can free itable_index.
++    assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
++    if (itable_index.is_register()) {
++      slli(t0, itable_index.as_register(), 3);
++    } else {
++      mv(t0, itable_index.as_constant() << 3);
++    }
++    add(recv_klass, recv_klass, t0);
++    if (itentry_off) {
++      add(recv_klass, recv_klass, itentry_off);
++    }
++  }
 +
-+  void load_earlyret_value(TosState state);
++  Label search, found_method;
 +
-+  void jump_to_entry(address entry);
++  ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes()));
++  beq(intf_klass, method_result, found_method);
++  bind(search);
++  // Check that the previous entry is non-null. A null entry means that
++  // the receiver class doens't implement the interface, and wasn't the
++  // same as when the caller was compiled.
++  beqz(method_result, L_no_such_interface, /* is_far */ true);
++  addi(scan_tmp, scan_tmp, scan_step);
++  ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes()));
++  bne(intf_klass, method_result, search);
 +
-+  virtual void check_and_handle_popframe(Register java_thread);
-+  virtual void check_and_handle_earlyret(Register java_thread);
++  bind(found_method);
 +
-+  // Interpreter-specific registers
-+  void save_bcp() {
-+    sd(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize));
++  // Got a hit.
++  if (return_method) {
++    lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset_in_bytes()));
++    add(method_result, recv_klass, scan_tmp);
++    ld(method_result, Address(method_result));
 +  }
++}
 +
-+  void restore_bcp() {
-+    ld(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize));
-+  }
++// virtual method calling
++void MacroAssembler::lookup_virtual_method(Register recv_klass,
++                                           RegisterOrConstant vtable_index,
++                                           Register method_result) {
++  const int base = in_bytes(Klass::vtable_start_offset());
++  assert(vtableEntry::size() * wordSize == 8,
++         "adjust the scaling in the code below");
++  int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes();
 +
-+  void restore_locals() {
-+    ld(xlocals, Address(fp, frame::interpreter_frame_locals_offset * wordSize));
++  if (vtable_index.is_register()) {
++    shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord);
++    ld(method_result, Address(method_result, vtable_offset_in_bytes));
++  } else {
++    vtable_offset_in_bytes += vtable_index.as_constant() * wordSize;
++    ld(method_result, form_address(method_result, recv_klass, vtable_offset_in_bytes));
 +  }
++}
 +
-+  void restore_constant_pool_cache() {
-+    ld(xcpool, Address(fp, frame::interpreter_frame_cache_offset * wordSize));
-+  }
++void MacroAssembler::membar(uint32_t order_constraint) {
++  address prev = pc() - NativeMembar::instruction_size;
++  address last = code()->last_insn();
 +
-+  void get_dispatch();
++  if (last != NULL && nativeInstruction_at(last)->is_membar() && prev == last) {
++    NativeMembar *bar = NativeMembar_at(prev);
++    // We are merging two memory barrier instructions.  On RISCV we
++    // can do this simply by ORing them together.
++    bar->set_kind(bar->get_kind() | order_constraint);
++    BLOCK_COMMENT("merged membar");
++  } else {
++    code()->set_last_insn(pc());
 +
-+  // Helpers for runtime call arguments/results
-+  void get_method(Register reg) {
-+    ld(reg, Address(fp, frame::interpreter_frame_method_offset * wordSize));
-+  }
++    uint32_t predecessor = 0;
++    uint32_t successor = 0;
 +
-+  void get_const(Register reg) {
-+    get_method(reg);
-+    ld(reg, Address(reg, in_bytes(Method::const_offset())));
++    membar_mask_to_pred_succ(order_constraint, predecessor, successor);
++    fence(predecessor, successor);
 +  }
++}
 +
-+  void get_constant_pool(Register reg) {
-+    get_const(reg);
-+    ld(reg, Address(reg, in_bytes(ConstMethod::constants_offset())));
++// Form an addres from base + offset in Rd. Rd my or may not
++// actually be used: you must use the Address that is returned. It
++// is up to you to ensure that the shift provided mathces the size
++// of your data.
++Address MacroAssembler::form_address(Register Rd, Register base, long byte_offset) {
++  if (is_offset_in_range(byte_offset, 12)) { // 12: imm in range 2^12
++    return Address(base, byte_offset);
 +  }
 +
-+  void get_constant_pool_cache(Register reg) {
-+    get_constant_pool(reg);
-+    ld(reg, Address(reg, ConstantPool::cache_offset_in_bytes()));
-+  }
-+
-+  void get_cpool_and_tags(Register cpool, Register tags) {
-+    get_constant_pool(cpool);
-+    ld(tags, Address(cpool, ConstantPool::tags_offset_in_bytes()));
-+  }
++  // Do it the hard way
++  mv(Rd, byte_offset);
++  add(Rd, base, Rd);
++  return Address(Rd);
++}
 +
-+  void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset);
-+  void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2));
-+  void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2));
-+  void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2));
-+  void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2));
-+  void get_method_counters(Register method, Register mcs, Label& skip);
++void MacroAssembler::check_klass_subtype(Register sub_klass,
++                                         Register super_klass,
++                                         Register tmp_reg,
++                                         Label& L_success) {
++  Label L_failure;
++  check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, NULL);
++  check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, NULL);
++  bind(L_failure);
++}
 +
-+  // Load cpool->resolved_references(index).
-+  void load_resolved_reference_at_index(Register result, Register index, Register tmp = x15);
++void MacroAssembler::safepoint_poll(Label& slow_path) {
++  if (SafepointMechanism::uses_thread_local_poll()) {
++    ld(t1, Address(xthread, Thread::polling_page_offset()));
++    andi(t0, t1, SafepointMechanism::poll_bit());
++    bnez(t0, slow_path);
++  } else {
++    int32_t offset = 0;
++    la_patchable(t0, ExternalAddress(SafepointSynchronize::address_of_state()), offset);
++    lwu(t0, Address(t0, offset));
++    assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code");
++    bnez(t0, slow_path);
++  }
++}
 +
-+  // Load cpool->resolved_klass_at(index).
-+  void load_resolved_klass_at_offset(Register cpool, Register index, Register klass, Register temp);
++void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
++                                Label &succeed, Label *fail) {
++  // oldv holds comparison value
++  // newv holds value to write in exchange
++  // addr identifies memory word to compare against/update
++  Label retry_load, nope;
++  bind(retry_load);
++  // Load reserved from the memory location
++  lr_d(tmp, addr, Assembler::aqrl);
++  // Fail and exit if it is not what we expect
++  bne(tmp, oldv, nope);
++  // If the store conditional succeeds, tmp will be zero
++  sc_d(tmp, newv, addr, Assembler::rl);
++  beqz(tmp, succeed);
++  // Retry only when the store conditional failed
++  j(retry_load);
 +
-+  void load_resolved_method_at_index(int byte_no, Register method, Register cache);
++  bind(nope);
++  membar(AnyAny);
++  mv(oldv, tmp);
++  if (fail != NULL) {
++    j(*fail);
++  }
++}
 +
-+  void pop_ptr(Register r = x10);
-+  void pop_i(Register r = x10);
-+  void pop_l(Register r = x10);
-+  void pop_f(FloatRegister r = f10);
-+  void pop_d(FloatRegister r = f10);
-+  void push_ptr(Register r = x10);
-+  void push_i(Register r = x10);
-+  void push_l(Register r = x10);
-+  void push_f(FloatRegister r = f10);
-+  void push_d(FloatRegister r = f10);
++void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp,
++                                        Label &succeed, Label *fail) {
++  assert(oopDesc::mark_offset_in_bytes() == 0, "assumption");
++  cmpxchgptr(oldv, newv, obj, tmp, succeed, fail);
++}
 +
-+  void pop(TosState state); // transition vtos -> state
-+  void push(TosState state); // transition state -> vtos
++void MacroAssembler::load_reserved(Register addr,
++                                   enum operand_size size,
++                                   Assembler::Aqrl acquire) {
++  switch (size) {
++    case int64:
++      lr_d(t0, addr, acquire);
++      break;
++    case int32:
++      lr_w(t0, addr, acquire);
++      break;
++    case uint32:
++      lr_w(t0, addr, acquire);
++      zero_extend(t0, t0, 32);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
 +
-+  void empty_expression_stack() {
-+    ld(esp, Address(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize));
-+    // NULL last_sp until next java call
-+    sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++void MacroAssembler::store_conditional(Register addr,
++                                       Register new_val,
++                                       enum operand_size size,
++                                       Assembler::Aqrl release) {
++  switch (size) {
++    case int64:
++      sc_d(t0, new_val, addr, release);
++      break;
++    case int32:
++    case uint32:
++      sc_w(t0, new_val, addr, release);
++      break;
++    default:
++      ShouldNotReachHere();
 +  }
++}
 +
-+  // Helpers for swap and dup
-+  void load_ptr(int n, Register val);
-+  void store_ptr(int n, Register val);
 +
-+  // Load float value from 'address'. The value is loaded onto the FPU register v0.
-+  void load_float(Address src);
-+  void load_double(Address src);
++void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected,
++                                                 Register new_val,
++                                                 enum operand_size size,
++                                                 Register tmp1, Register tmp2, Register tmp3) {
++  assert(size == int8 || size == int16, "unsupported operand size");
 +
-+  // Generate a subtype check: branch to ok_is_subtype if sub_klass is
-+  // a subtype of super_klass.
-+  void gen_subtype_check( Register sub_klass, Label &ok_is_subtype );
++  Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3;
 +
-+  // Dispatching
-+  void dispatch_prolog(TosState state, int step = 0);
-+  void dispatch_epilog(TosState state, int step = 0);
-+  // dispatch via t0
-+  void dispatch_only(TosState state, bool generate_poll = false, Register Rs = t0);
-+  // dispatch normal table via t0 (assume t0 is loaded already)
-+  void dispatch_only_normal(TosState state, Register Rs = t0);
-+  void dispatch_only_noverify(TosState state, Register Rs = t0);
-+  // load t0 from [xbcp + step] and dispatch via t0
-+  void dispatch_next(TosState state, int step = 0, bool generate_poll = false);
-+  // load t0 from [xbcp] and dispatch via t0 and table
-+  void dispatch_via (TosState state, address* table);
++  andi(shift, addr, 3);
++  slli(shift, shift, 3);
 +
-+  // jump to an invoked target
-+  void prepare_to_jump_from_interpreted();
-+  void jump_from_interpreted(Register method);
++  andi(aligned_addr, addr, ~3);
 +
++  if (size == int8) {
++    mv(mask, 0xff);
++  } else {
++    // size == int16 case
++    mv(mask, -1);
++    zero_extend(mask, mask, 16);
++  }
++  sll(mask, mask, shift);
 +
-+  // Returning from interpreted functions
-+  //
-+  // Removes the current activation (incl. unlocking of monitors)
-+  // and sets up the return address.  This code is also used for
-+  // exception unwindwing. In that case, we do not want to throw
-+  // IllegalMonitorStateExceptions, since that might get us into an
-+  // infinite rethrow exception loop.
-+  // Additionally this code is used for popFrame and earlyReturn.
-+  // In popFrame case we want to skip throwing an exception,
-+  // installing an exception, and notifying jvmdi.
-+  // In earlyReturn case we only want to skip throwing an exception
-+  // and installing an exception.
-+  void remove_activation(TosState state,
-+                         bool throw_monitor_exception = true,
-+                         bool install_monitor_exception = true,
-+                         bool notify_jvmdi = true);
++  xori(not_mask, mask, -1);
 +
-+  // FIXME: Give us a valid frame at a null check.
-+  virtual void null_check(Register reg, int offset = -1) {
-+        MacroAssembler::null_check(reg, offset);
-+  }
++  sll(expected, expected, shift);
++  andr(expected, expected, mask);
 +
-+  // Object locking
-+  void lock_object  (Register lock_reg);
-+  void unlock_object(Register lock_reg);
++  sll(new_val, new_val, shift);
++  andr(new_val, new_val, mask);
++}
 +
-+  // Interpreter profiling operations
-+  void set_method_data_pointer_for_bcp();
-+  void test_method_data_pointer(Register mdp, Label& zero_continue);
-+  void verify_method_data_pointer();
++// cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps.
++// It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w,
++// which are forced to work with 4-byte aligned address.
++void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected,
++                                          Register new_val,
++                                          enum operand_size size,
++                                          Assembler::Aqrl acquire, Assembler::Aqrl release,
++                                          Register result, bool result_as_bool,
++                                          Register tmp1, Register tmp2, Register tmp3) {
++  Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0;
++  assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
++  cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
 +
-+  void set_mdp_data_at(Register mdp_in, int constant, Register value);
-+  void increment_mdp_data_at(Address data, bool decrement = false);
-+  void increment_mdp_data_at(Register mdp_in, int constant,
-+                             bool decrement = false);
-+  void increment_mdp_data_at(Register mdp_in, Register reg, int constant,
-+                             bool decrement = false);
-+  void increment_mask_and_jump(Address counter_addr,
-+                               int increment, Address mask,
-+                               Register tmp1, Register tmp2,
-+                               bool preloaded, Label* where);
++  Label retry, fail, done;
 +
-+  void set_mdp_flag_at(Register mdp_in, int flag_constant);
-+  void test_mdp_data_at(Register mdp_in, int offset, Register value,
-+                        Register test_value_out,
-+                        Label& not_equal_continue);
++  bind(retry);
++  lr_w(old, aligned_addr, acquire);
++  andr(tmp, old, mask);
++  bne(tmp, expected, fail);
 +
-+  void record_klass_in_profile(Register receiver, Register mdp,
-+                               Register reg2, bool is_virtual_call);
-+  void record_klass_in_profile_helper(Register receiver, Register mdp,
-+                                      Register reg2,
-+                                      Label& done, bool is_virtual_call);
-+  void record_item_in_profile_helper(Register item, Register mdp,
-+                                     Register reg2, int start_row, Label& done, int total_rows,
-+                                     OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn,
-+                                     int non_profiled_offset);
++  andr(tmp, old, not_mask);
++  orr(tmp, tmp, new_val);
++  sc_w(tmp, tmp, aligned_addr, release);
++  bnez(tmp, retry);
 +
-+  void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
-+  void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp);
-+  void update_mdp_by_constant(Register mdp_in, int constant);
-+  void update_mdp_for_ret(Register return_bci);
++  if (result_as_bool) {
++    mv(result, 1);
++    j(done);
 +
-+  // narrow int return value
-+  void narrow(Register result);
++    bind(fail);
++    mv(result, zr);
 +
-+  void profile_taken_branch(Register mdp, Register bumped_count);
-+  void profile_not_taken_branch(Register mdp);
-+  void profile_call(Register mdp);
-+  void profile_final_call(Register mdp);
-+  void profile_virtual_call(Register receiver, Register mdp,
-+                            Register t1,
-+                            bool receiver_can_be_null = false);
-+  void profile_ret(Register return_bci, Register mdp);
-+  void profile_null_seen(Register mdp);
-+  void profile_typecheck(Register mdp, Register klass, Register temp);
-+  void profile_typecheck_failed(Register mdp);
-+  void profile_switch_default(Register mdp);
-+  void profile_switch_case(Register index_in_scratch, Register mdp,
-+                           Register temp);
++    bind(done);
++  } else {
++    andr(tmp, old, mask);
 +
-+  void profile_obj_type(Register obj, const Address& mdo_addr, Register tmp);
-+  void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual);
-+  void profile_return_type(Register mdp, Register ret, Register tmp);
-+  void profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3);
++    bind(fail);
++    srl(result, tmp, shift);
 +
-+  // Debugging
-+  // only if +VerifyFPU  && (state == ftos || state == dtos)
-+  void verify_FPU(int stack_depth, TosState state = ftos);
++    if (size == int8) {
++      sign_extend(result, result, 8);
++    } else {
++      // size == int16 case
++      sign_extend(result, result, 16);
++    }
++  }
++}
 +
-+  typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode;
++// weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement
++// the weak CAS stuff. The major difference is that it just failed when store conditional
++// failed.
++void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected,
++                                               Register new_val,
++                                               enum operand_size size,
++                                               Assembler::Aqrl acquire, Assembler::Aqrl release,
++                                               Register result,
++                                               Register tmp1, Register tmp2, Register tmp3) {
++  Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0;
++  assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
++  cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
 +
-+  // support for jvmti/dtrace
-+  void notify_method_entry();
-+  void notify_method_exit(TosState state, NotifyMethodExitMode mode);
++  Label fail, done;
 +
-+  virtual void _call_Unimplemented(address call_site) {
-+    save_bcp();
-+    set_last_Java_frame(esp, fp, (address) pc(), t0);
-+    MacroAssembler::_call_Unimplemented(call_site);
-+  }
++  lr_w(old, aligned_addr, acquire);
++  andr(tmp, old, mask);
++  bne(tmp, expected, fail);
 +
-+#ifdef ASSERT
-+  void verify_access_flags(Register access_flags, uint32_t flag_bits,
-+                           const char* msg, bool stop_by_hit = true);
-+  void verify_frame_setup();
-+#endif
-+};
++  andr(tmp, old, not_mask);
++  orr(tmp, tmp, new_val);
++  sc_w(tmp, tmp, aligned_addr, release);
++  bnez(tmp, fail);
 +
-+#endif // CPU_RISCV_INTERP_MASM_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
-new file mode 100644
-index 00000000000..d93530d8564
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
-@@ -0,0 +1,295 @@
-+/*
-+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  // Success
++  mv(result, 1);
++  j(done);
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "interpreter/interp_masm.hpp"
-+#include "interpreter/interpreter.hpp"
-+#include "interpreter/interpreterRuntime.hpp"
-+#include "memory/allocation.inline.hpp"
-+#include "memory/universe.hpp"
-+#include "oops/method.hpp"
-+#include "oops/oop.inline.hpp"
-+#include "runtime/handles.inline.hpp"
-+#include "runtime/icache.hpp"
-+#include "runtime/interfaceSupport.inline.hpp"
-+#include "runtime/signature.hpp"
++  // Fail
++  bind(fail);
++  mv(result, zr);
 +
-+#define __ _masm->
++  bind(done);
++}
 +
-+// Implementation of SignatureHandlerGenerator
-+Register InterpreterRuntime::SignatureHandlerGenerator::from() { return xlocals; }
-+Register InterpreterRuntime::SignatureHandlerGenerator::to()   { return sp; }
-+Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return t0; }
++void MacroAssembler::cmpxchg(Register addr, Register expected,
++                             Register new_val,
++                             enum operand_size size,
++                             Assembler::Aqrl acquire, Assembler::Aqrl release,
++                             Register result, bool result_as_bool) {
++  assert(size != int8 && size != int16, "unsupported operand size");
 +
-+Register InterpreterRuntime::SignatureHandlerGenerator::next_gpr() {
-+  if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) {
-+    return g_INTArgReg[++_num_reg_int_args];
++  Label retry_load, done, ne_done;
++  bind(retry_load);
++  load_reserved(addr, size, acquire);
++  bne(t0, expected, ne_done);
++  store_conditional(addr, new_val, size, release);
++  bnez(t0, retry_load);
++
++  // equal, succeed
++  if (result_as_bool) {
++    mv(result, 1);
++  } else {
++    mv(result, expected);
 +  }
-+  return noreg;
-+}
++  j(done);
 +
-+FloatRegister InterpreterRuntime::SignatureHandlerGenerator::next_fpr() {
-+  if (_num_reg_fp_args < Argument::n_float_register_parameters_c) {
-+    return g_FPArgReg[_num_reg_fp_args++];
++  // not equal, failed
++  bind(ne_done);
++  if (result_as_bool) {
++    mv(result, zr);
 +  } else {
-+    return fnoreg;
++    mv(result, t0);
 +  }
++
++  bind(done);
 +}
 +
-+int InterpreterRuntime::SignatureHandlerGenerator::next_stack_offset() {
-+  int ret = _stack_offset;
-+  _stack_offset += wordSize;
-+  return ret;
++void MacroAssembler::cmpxchg_weak(Register addr, Register expected,
++                                  Register new_val,
++                                  enum operand_size size,
++                                  Assembler::Aqrl acquire, Assembler::Aqrl release,
++                                  Register result) {
++  Label fail, done;
++  load_reserved(addr, size, acquire);
++  bne(t0, expected, fail);
++  store_conditional(addr, new_val, size, release);
++  bnez(t0, fail);
++
++  // Success
++  mv(result, 1);
++  j(done);
++
++  // Fail
++  bind(fail);
++  mv(result, zr);
++
++  bind(done);
 +}
 +
-+InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator(
-+  const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) {
-+  _masm = new MacroAssembler(buffer); // allocate on resourse area by default
-+  _num_reg_int_args = (method->is_static() ? 1 : 0);
-+  _num_reg_fp_args = 0;
-+  _stack_offset = 0;
++#define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE)                                              \
++void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \
++  prev = prev->is_valid() ? prev : zr;                                                      \
++  if (incr.is_register()) {                                                                 \
++    AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE));              \
++  } else {                                                                                  \
++    mv(t0, incr.as_constant());                                                             \
++    AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE));                              \
++  }                                                                                         \
++  return;                                                                                   \
 +}
 +
-+void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
-+  const Address src(from(), Interpreter::local_offset_in_bytes(offset()));
++ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed)
++ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed)
++ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl)
++ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl)
 +
-+  Register reg = next_gpr();
-+  if (reg != noreg) {
-+    __ lw(reg, src);
-+  } else {
-+    __ lw(x10, src);
-+    __ sw(x10, Address(to(), next_stack_offset()));
-+  }
++#undef ATOMIC_OP
++
++#define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE)                                       \
++void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) {      \
++  prev = prev->is_valid() ? prev : zr;                                               \
++  AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE));                       \
++  return;                                                                            \
 +}
 +
-+void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
-+  const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1));
++ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed)
++ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed)
++ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl)
++ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl)
 +
-+  Register reg = next_gpr();
-+  if (reg != noreg) {
-+    __ ld(reg, src);
-+  } else  {
-+    __ ld(x10, src);
-+    __ sd(x10, Address(to(), next_stack_offset()));
-+  }
++#undef ATOMIC_XCHG
++
++#define ATOMIC_XCHGU(OP1, OP2)                                                       \
++void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) {     \
++  atomic_##OP2(prev, newv, addr);                                                    \
++  zero_extend(prev, prev, 32);                                                       \
++  return;                                                                            \
 +}
 +
-+void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
-+  const Address src(from(), Interpreter::local_offset_in_bytes(offset()));
++ATOMIC_XCHGU(xchgwu, xchgw)
++ATOMIC_XCHGU(xchgalwu, xchgalw)
 +
-+  FloatRegister reg = next_fpr();
-+  if (reg != fnoreg) {
-+    __ flw(reg, src);
++#undef ATOMIC_XCHGU
++
++void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
++  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
++  assert(CodeCache::find_blob(entry.target()) != NULL,
++         "destination of far call not found in code cache");
++  int32_t offset = 0;
++  if (far_branches()) {
++    // We can use auipc + jalr here because we know that the total size of
++    // the code cache cannot exceed 2Gb.
++    la_patchable(tmp, entry, offset);
++    if (cbuf != NULL) { cbuf->set_insts_mark(); }
++    jalr(x0, tmp, offset);
 +  } else {
-+    // a floating-point argument is passed according to the integer calling
-+    // convention if no floating-point argument register available
-+    pass_int();
++    if (cbuf != NULL) { cbuf->set_insts_mark(); }
++    j(entry);
 +  }
 +}
 +
-+void InterpreterRuntime::SignatureHandlerGenerator::pass_double() {
-+  const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1));
-+
-+  FloatRegister reg = next_fpr();
-+  if (reg != fnoreg) {
-+    __ fld(reg, src);
-+  } else {
-+    // a floating-point argument is passed according to the integer calling
-+    // convention if no floating-point argument register available
-+    pass_long();
++void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) {
++  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
++  assert(CodeCache::find_blob(entry.target()) != NULL,
++         "destination of far call not found in code cache");
++  int32_t offset = 0;
++  if (far_branches()) {
++    // We can use auipc + jalr here because we know that the total size of
++    // the code cache cannot exceed 2Gb.
++    la_patchable(tmp, entry, offset);
++    if (cbuf != NULL) { cbuf->set_insts_mark(); }
++    jalr(x1, tmp, offset); // link
++  } else {
++    if (cbuf != NULL) { cbuf->set_insts_mark(); }
++    jal(entry); // link
 +  }
 +}
 +
-+void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
-+  Register reg = next_gpr();
-+  if (reg == c_rarg1) {
-+    assert(offset() == 0, "argument register 1 can only be (non-null) receiver");
-+    __ addi(c_rarg1, from(), Interpreter::local_offset_in_bytes(offset()));
-+  } else if (reg != noreg) {
-+      // c_rarg2-c_rarg7
-+      __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset()));
-+      __ mv(reg, zr); //_num_reg_int_args:c_rarg -> 1:c_rarg2,  2:c_rarg3...
-+      __ ld(temp(), x10);
-+      Label L;
-+      __ beqz(temp(), L);
-+      __ mv(reg, x10);
-+      __ bind(L);
++void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
++                                                   Register super_klass,
++                                                   Register tmp_reg,
++                                                   Label* L_success,
++                                                   Label* L_failure,
++                                                   Label* L_slow_path,
++                                                   Register super_check_offset) {
++  assert_different_registers(sub_klass, super_klass, tmp_reg);
++  bool must_load_sco = (super_check_offset == noreg);
++  if (must_load_sco) {
++    assert(tmp_reg != noreg, "supply either a temp or a register offset");
 +  } else {
-+    //to stack
-+    __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset()));
-+    __ ld(temp(), x10);
-+    Label L;
-+    __ bnez(temp(), L);
-+    __ mv(x10, zr);
-+    __ bind(L);
-+    assert(sizeof(jobject) == wordSize, "");
-+    __ sd(x10, Address(to(), next_stack_offset()));
++    assert_different_registers(sub_klass, super_klass, super_check_offset);
 +  }
-+}
-+
-+void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) {
-+  // generate code to handle arguments
-+  iterate(fingerprint);
-+
-+  // return result handler
-+  __ la(x10, ExternalAddress(Interpreter::result_handler(method()->result_type())));
-+  __ ret();
 +
-+  __ flush();
-+}
++  Label L_fallthrough;
++  int label_nulls = 0;
++  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
++  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
++  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
++  assert(label_nulls <= 1, "at most one NULL in batch");
 +
++  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
++  int sco_offset = in_bytes(Klass::super_check_offset_offset());
++  Address super_check_offset_addr(super_klass, sco_offset);
 +
-+// Implementation of SignatureHandlerLibrary
++  // Hacked jmp, which may only be used just before L_fallthrough.
++#define final_jmp(label)                                                \
++  if (&(label) == &L_fallthrough) { /*do nothing*/ }                    \
++  else                            j(label)             /*omit semi*/
 +
-+void SignatureHandlerLibrary::pd_set_handler(address handler) {}
++  // If the pointers are equal, we are done (e.g., String[] elements).
++  // This self-check enables sharing of secondary supertype arrays among
++  // non-primary types such as array-of-interface. Otherwise, each such
++  // type would need its own customized SSA.
++  // We move this check to the front fo the fast path because many
++  // type checks are in fact trivially successful in this manner,
++  // so we get a nicely predicted branch right at the start of the check.
++  beq(sub_klass, super_klass, *L_success);
 +
++  // Check the supertype display:
++  if (must_load_sco) {
++    lwu(tmp_reg, super_check_offset_addr);
++    super_check_offset = tmp_reg;
++  }
++  add(t0, sub_klass, super_check_offset);
++  Address super_check_addr(t0);
++  ld(t0, super_check_addr); // load displayed supertype
 +
-+class SlowSignatureHandler
-+  : public NativeSignatureIterator {
-+ private:
-+  address   _from;
-+  intptr_t* _to;
-+  intptr_t* _int_args;
-+  intptr_t* _fp_args;
-+  intptr_t* _fp_identifiers;
-+  unsigned int _num_reg_int_args;
-+  unsigned int _num_reg_fp_args;
++  // Ths check has worked decisively for primary supers.
++  // Secondary supers are sought in the super_cache ('super_cache_addr').
++  // (Secondary supers are interfaces and very deeply nested subtypes.)
++  // This works in the same check above because of a tricky aliasing
++  // between the super_Cache and the primary super dispaly elements.
++  // (The 'super_check_addr' can address either, as the case requires.)
++  // Note that the cache is updated below if it does not help us find
++  // what we need immediately.
++  // So if it was a primary super, we can just fail immediately.
++  // Otherwise, it's the slow path for us (no success at this point).
 +
-+  intptr_t* single_slot_addr() {
-+    intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0));
-+    _from -= Interpreter::stackElementSize;
-+    return from_addr;
++  beq(super_klass, t0, *L_success);
++  mv(t1, sc_offset);
++  if (L_failure == &L_fallthrough) {
++    beq(super_check_offset, t1, *L_slow_path);
++  } else {
++    bne(super_check_offset, t1, *L_failure, /* is_far */ true);
++    final_jmp(*L_slow_path);
 +  }
 +
-+  intptr_t* double_slot_addr() {
-+    intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(1));
-+    _from -= 2 * Interpreter::stackElementSize;
-+    return from_addr;
-+  }
++  bind(L_fallthrough);
 +
-+  int pass_gpr(intptr_t value) {
-+    if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) {
-+      *_int_args++ = value;
-+      return _num_reg_int_args++;
-+    }
-+    return -1;
-+  }
++#undef final_jmp
++}
 +
-+  int pass_fpr(intptr_t value) {
-+    if (_num_reg_fp_args < Argument::n_float_register_parameters_c) {
-+      *_fp_args++ = value;
-+      return _num_reg_fp_args++;
-+    }
-+    return -1;
-+  }
++// Scans count pointer sized words at [addr] for occurence of value,
++// generic
++void MacroAssembler::repne_scan(Register addr, Register value, Register count,
++                                Register tmp) {
++  Label Lloop, Lexit;
++  beqz(count, Lexit);
++  bind(Lloop);
++  ld(tmp, addr);
++  beq(value, tmp, Lexit);
++  add(addr, addr, wordSize);
++  sub(count, count, 1);
++  bnez(count, Lloop);
++  bind(Lexit);
++}
 +
-+  void pass_stack(intptr_t value) {
-+    *_to++ = value;
++void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
++                                                   Register super_klass,
++                                                   Register tmp1_reg,
++                                                   Register tmp2_reg,
++                                                   Label* L_success,
++                                                   Label* L_failure) {
++  assert_different_registers(sub_klass, super_klass, tmp1_reg);
++  if (tmp2_reg != noreg) {
++    assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0);
 +  }
++#define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg)
 +
-+  virtual void pass_int() {
-+    jint value = *(jint*)single_slot_addr();
-+    if (pass_gpr(value) < 0) {
-+      pass_stack(value);
-+    }
-+  }
++  Label L_fallthrough;
++  int label_nulls = 0;
++  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
++  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
 +
-+  virtual void pass_long() {
-+    intptr_t value = *double_slot_addr();
-+    if (pass_gpr(value) < 0) {
-+      pass_stack(value);
-+    }
-+  }
++  assert(label_nulls <= 1, "at most one NULL in the batch");
 +
-+  virtual void pass_object() {
-+    intptr_t* addr = single_slot_addr();
-+    intptr_t value = *addr == 0 ? NULL : (intptr_t)addr;
-+    if (pass_gpr(value) < 0) {
-+      pass_stack(value);
-+    }
-+  }
++  // A couple of usefule fields in sub_klass:
++  int ss_offset = in_bytes(Klass::secondary_supers_offset());
++  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
++  Address secondary_supers_addr(sub_klass, ss_offset);
++  Address super_cache_addr(     sub_klass, sc_offset);
 +
-+  virtual void pass_float() {
-+    jint value = *(jint*) single_slot_addr();
-+    // a floating-point argument is passed according to the integer calling
-+    // convention if no floating-point argument register available
-+    if (pass_fpr(value) < 0 && pass_gpr(value) < 0) {
-+      pass_stack(value);
-+    }
-+  }
++  BLOCK_COMMENT("check_klass_subtype_slow_path");
 +
-+  virtual void pass_double() {
-+    intptr_t value = *double_slot_addr();
-+    int arg = pass_fpr(value);
-+    if (0 <= arg) {
-+      *_fp_identifiers |= (1ull << arg); // mark as double
-+    } else if (pass_gpr(value) < 0) { // no need to mark if passing by integer registers or stack
-+      pass_stack(value);
-+    }
-+  }
++  // Do a linear scan of the secondary super-klass chain.
++  // This code is rarely used, so simplicity is a virtue here.
++  // The repne_scan instruction uses fixed registers, which we must spill.
++  // Don't worry too much about pre-existing connecitons with the input regs.
 +
-+ public:
-+  SlowSignatureHandler(const methodHandle& method, address from, intptr_t* to)
-+    : NativeSignatureIterator(method)
-+  {
-+    _from = from;
-+    _to   = to;
++  assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super)
++  assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter)
 +
-+    _int_args = to - (method->is_static() ? 16 : 17);
-+    _fp_args  = to - 8;
-+    _fp_identifiers = to - 9;
-+    *(int*) _fp_identifiers = 0;
-+    _num_reg_int_args = (method->is_static() ? 1 : 0);
-+    _num_reg_fp_args = 0;
++  RegSet pushed_registers;
++  if (!IS_A_TEMP(x12)) {
++    pushed_registers += x12;
 +  }
-+
-+  ~SlowSignatureHandler()
-+  {
-+    _from           = NULL;
-+    _to             = NULL;
-+    _int_args       = NULL;
-+    _fp_args        = NULL;
-+    _fp_identifiers = NULL;
++  if (!IS_A_TEMP(x15)) {
++    pushed_registers += x15;
 +  }
-+};
 +
++  if (super_klass != x10 || UseCompressedOops) {
++    if (!IS_A_TEMP(x10)) {
++      pushed_registers += x10;
++    }
++  }
 +
-+JRT_ENTRY(address,
-+          InterpreterRuntime::slow_signature_handler(JavaThread* current,
-+                                                     Method* method,
-+                                                     intptr_t* from,
-+                                                     intptr_t* to))
-+  methodHandle m(current, (Method*)method);
-+  assert(m->is_native(), "sanity check");
++  push_reg(pushed_registers, sp);
 +
-+  // handle arguments
-+  SlowSignatureHandler ssh(m, (address)from, to);
-+  ssh.iterate(UCONST64(-1));
++  // Get super_klass value into x10 (even if it was in x15 or x12)
++  mv(x10, super_klass);
 +
-+  // return result handler
-+  return Interpreter::result_handler(m->result_type());
-+JRT_END
-diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp
-new file mode 100644
-index 00000000000..05df63ba2ae
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp
-@@ -0,0 +1,68 @@
-+/*
-+ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++#ifndef PRODUCT
++  mv(t1, (address)&SharedRuntime::_partial_subtype_ctr);
++  Address pst_counter_addr(t1);
++  ld(t0, pst_counter_addr);
++  add(t0, t0, 1);
++  sd(t0, pst_counter_addr);
++#endif // PRODUCT
 +
-+#ifndef CPU_RISCV_INTERPRETERRT_RISCV_HPP
-+#define CPU_RISCV_INTERPRETERRT_RISCV_HPP
++  // We will consult the secondary-super array.
++  ld(x15, secondary_supers_addr);
++  // Load the array length.
++  lwu(x12, Address(x15, Array<Klass*>::length_offset_in_bytes()));
++  // Skip to start of data.
++  add(x15, x15, Array<Klass*>::base_offset_in_bytes());
 +
-+// This is included in the middle of class Interpreter.
-+// Do not include files here.
++  // Set t0 to an obvious invalid value, falling through by default
++  mv(t0, -1);
++  // Scan X12 words at [X15] for an occurrence of X10.
++  repne_scan(x15, x10, x12, t0);
 +
-+// native method calls
++  // pop will restore x10, so we should use a temp register to keep its value
++  mv(t1, x10);
 +
-+class SignatureHandlerGenerator: public NativeSignatureIterator {
-+ private:
-+  MacroAssembler* _masm;
-+  unsigned int _num_reg_fp_args;
-+  unsigned int _num_reg_int_args;
-+  int _stack_offset;
++  // Unspill the temp registers:
++  pop_reg(pushed_registers, sp);
 +
-+  void pass_int();
-+  void pass_long();
-+  void pass_float();
-+  void pass_double();
-+  void pass_object();
++  bne(t1, t0, *L_failure);
 +
-+  Register next_gpr();
-+  FloatRegister next_fpr();
-+  int next_stack_offset();
++  // Success. Cache the super we found an proceed in triumph.
++  sd(super_klass, super_cache_addr);
 +
-+ public:
-+  // Creation
-+  SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer);
-+  virtual ~SignatureHandlerGenerator() {
-+    _masm = NULL;
++  if (L_success != &L_fallthrough) {
++    j(*L_success);
 +  }
 +
-+  // Code generation
-+  void generate(uint64_t fingerprint);
-+
-+  // Code generation support
-+  static Register from();
-+  static Register to();
-+  static Register temp();
-+};
++#undef IS_A_TEMP
 +
-+#endif // CPU_RISCV_INTERPRETERRT_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
-new file mode 100644
-index 00000000000..9a6084afa1d
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
-@@ -0,0 +1,86 @@
-+/*
-+ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  bind(L_fallthrough);
++}
 +
-+#ifndef CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
-+#define CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
++// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes.
++void MacroAssembler::tlab_allocate(Register obj,
++                                   Register var_size_in_bytes,
++                                   int con_size_in_bytes,
++                                   Register tmp1,
++                                   Register tmp2,
++                                   Label& slow_case,
++                                   bool is_far) {
++  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far);
++}
 +
-+private:
++// Defines obj, preserves var_size_in_bytes
++void MacroAssembler::eden_allocate(Register obj,
++                                   Register var_size_in_bytes,
++                                   int con_size_in_bytes,
++                                   Register tmp,
++                                   Label& slow_case,
++                                   bool is_far) {
++  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp, slow_case, is_far);
++}
 +
-+  // FP value associated with _last_Java_sp:
-+  intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to
 +
-+public:
-+  // Each arch must define reset, save, restore
-+  // These are used by objects that only care about:
-+  //  1 - initializing a new state (thread creation, javaCalls)
-+  //  2 - saving a current state (javaCalls)
-+  //  3 - restoring an old state (javaCalls)
++// get_thread() can be called anywhere inside generated code so we
++// need to save whatever non-callee save context might get clobbered
++// by the call to Thread::current() or, indeed, the call setup code.
++void MacroAssembler::get_thread(Register thread) {
++  // save all call-clobbered regs except thread
++  RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) +
++                      RegSet::range(x28, x31) + ra - thread;
++  push_reg(saved_regs, sp);
 +
-+  void clear(void) {
-+    // clearing _last_Java_sp must be first
-+    _last_Java_sp = NULL;
-+    OrderAccess::release();
-+    _last_Java_fp = NULL;
-+    _last_Java_pc = NULL;
++  mv(ra, CAST_FROM_FN_PTR(address, Thread::current));
++  jalr(ra);
++  if (thread != c_rarg0) {
++    mv(thread, c_rarg0);
 +  }
 +
-+  void copy(JavaFrameAnchor* src) {
-+    // In order to make sure the transition state is valid for "this"
-+    // We must clear _last_Java_sp before copying the rest of the new data
-+    //
-+    // Hack Alert: Temporary bugfix for 4717480/4721647
-+    // To act like previous version (pd_cache_state) don't NULL _last_Java_sp
-+    // unless the value is changing
-+    //
-+    assert(src != NULL, "Src should not be NULL.");
-+    if (_last_Java_sp != src->_last_Java_sp) {
-+      _last_Java_sp = NULL;
-+      OrderAccess::release();
-+    }
-+    _last_Java_fp = src->_last_Java_fp;
-+    _last_Java_pc = src->_last_Java_pc;
-+    // Must be last so profiler will always see valid frame if has_last_frame() is true
-+    _last_Java_sp = src->_last_Java_sp;
-+  }
++  // restore pushed registers
++  pop_reg(saved_regs, sp);
++}
 +
-+  bool walkable(void)                            { return _last_Java_sp != NULL && _last_Java_pc != NULL; }
-+  void make_walkable(JavaThread* thread);
-+  void capture_last_Java_pc(void);
++void MacroAssembler::load_byte_map_base(Register reg) {
++  jbyte *byte_map_base =
++    ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
++  mv(reg, (uint64_t)byte_map_base);
++}
 +
-+  intptr_t* last_Java_sp(void) const             { return _last_Java_sp; }
++void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &offset) {
++  relocInfo::relocType rtype = dest.rspec().reloc()->type();
++  unsigned long low_address = (uintptr_t)CodeCache::low_bound();
++  unsigned long high_address = (uintptr_t)CodeCache::high_bound();
++  unsigned long dest_address = (uintptr_t)dest.target();
++  long offset_low = dest_address - low_address;
++  long offset_high = dest_address - high_address;
 +
-+  const address last_Java_pc(void)               { return _last_Java_pc; }
++  assert(is_valid_riscv64_address(dest.target()), "bad address");
++  assert(dest.getMode() == Address::literal, "la_patchable must be applied to a literal address");
 +
-+private:
++  relocate(dest.rspec());
++  // RISC-V doesn't compute a page-aligned address, in order to partially
++  // compensate for the use of *signed* offsets in its base+disp12
++  // addressing mode (RISC-V's PC-relative reach remains asymmetric
++  // [-(2G + 2K), 2G - 2k).
++  if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) {
++    int64_t distance = dest.target() - pc();
++    auipc(reg1, (int32_t)distance + 0x800);
++    offset = ((int32_t)distance << 20) >> 20;
++  } else {
++    movptr_with_offset(reg1, dest.target(), offset);
++  }
++}
 +
-+  static ByteSize last_Java_fp_offset()          { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); }
++void MacroAssembler::build_frame(int framesize) {
++  assert(framesize >= 2, "framesize must include space for FP/RA");
++  assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
++  sub(sp, sp, framesize);
++  sd(fp, Address(sp, framesize - 2 * wordSize));
++  sd(ra, Address(sp, framesize - wordSize));
++  if (PreserveFramePointer) { add(fp, sp, framesize); }
++}
 +
-+public:
++void MacroAssembler::remove_frame(int framesize) {
++  assert(framesize >= 2, "framesize must include space for FP/RA");
++  assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
++  ld(fp, Address(sp, framesize - 2 * wordSize));
++  ld(ra, Address(sp, framesize - wordSize));
++  add(sp, sp, framesize);
++}
 +
-+  void set_last_Java_sp(intptr_t* java_sp)       { _last_Java_sp = java_sp; OrderAccess::release(); }
++void MacroAssembler::reserved_stack_check() {
++    // testing if reserved zone needs to be enabled
++    Label no_reserved_zone_enabling;
 +
-+  intptr_t* last_Java_fp(void)                   { return _last_Java_fp; }
++    ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset()));
++    bltu(sp, t0, no_reserved_zone_enabling);
 +
-+#endif // CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
-new file mode 100644
-index 00000000000..814ed23e471
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
-@@ -0,0 +1,214 @@
-+/*
-+ * Copyright (c) 2004, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.hpp"
-+#include "gc/shared/barrierSet.hpp"
-+#include "gc/shared/barrierSetAssembler.hpp"
-+#include "memory/resourceArea.hpp"
-+#include "prims/jniFastGetField.hpp"
-+#include "prims/jvm_misc.hpp"
-+#include "prims/jvmtiExport.hpp"
-+#include "runtime/safepoint.hpp"
-+
-+#define __ masm->
-+
-+#define BUFFER_SIZE 30*wordSize
++    enter();   // RA and FP are live.
++    mv(c_rarg0, xthread);
++    int32_t offset = 0;
++    la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)), offset);
++    jalr(x1, t0, offset);
++    leave();
 +
-+// Instead of issuing a LoadLoad barrier we create an address
-+// dependency between loads; this might be more efficient.
++    // We have already removed our own frame.
++    // throw_delayed_StackOverflowError will think that it's been
++    // called by our caller.
++    offset = 0;
++    la_patchable(t0, RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()), offset);
++    jalr(x0, t0, offset);
++    should_not_reach_here();
 +
-+// Common register usage:
-+// x10/f10:      result
-+// c_rarg0:    jni env
-+// c_rarg1:    obj
-+// c_rarg2:    jfield id
++    bind(no_reserved_zone_enabling);
++}
 +
-+static const Register robj          = x13;
-+static const Register rcounter      = x14;
-+static const Register roffset       = x15;
-+static const Register rcounter_addr = x16;
-+static const Register result        = x17;
++void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) {
++  Label retry_load;
++  bind(retry_load);
++  // flush and load exclusive from the memory location
++  lr_w(tmp, counter_addr);
++  addw(tmp, tmp, 1);
++  // if we store+flush with no intervening write tmp wil be zero
++  sc_w(tmp, tmp, counter_addr);
++  bnez(tmp, retry_load);
++}
 +
-+address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
-+  const char *name;
-+  switch (type) {
-+    case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break;
-+    case T_BYTE:    name = "jni_fast_GetByteField";    break;
-+    case T_CHAR:    name = "jni_fast_GetCharField";    break;
-+    case T_SHORT:   name = "jni_fast_GetShortField";   break;
-+    case T_INT:     name = "jni_fast_GetIntField";     break;
-+    case T_LONG:    name = "jni_fast_GetLongField";    break;
-+    case T_FLOAT:   name = "jni_fast_GetFloatField";   break;
-+    case T_DOUBLE:  name = "jni_fast_GetDoubleField";  break;
-+    default:        ShouldNotReachHere();
-+      name = NULL;  // unreachable
-+  }
-+  ResourceMark rm;
-+  BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE);
-+  CodeBuffer cbuf(blob);
-+  MacroAssembler* masm = new MacroAssembler(&cbuf);
-+  address fast_entry = __ pc();
++void MacroAssembler::load_prototype_header(Register dst, Register src) {
++  load_klass(dst, src);
++  ld(dst, Address(dst, Klass::prototype_header_offset()));
++}
 +
-+  Label slow;
-+  int32_t offset = 0;
-+  __ la_patchable(rcounter_addr, SafepointSynchronize::safepoint_counter_addr(), offset);
-+  __ addi(rcounter_addr, rcounter_addr, offset);
++int MacroAssembler::biased_locking_enter(Register lock_reg,
++                                         Register obj_reg,
++                                         Register swap_reg,
++                                         Register tmp_reg,
++                                         bool swap_reg_contains_mark,
++                                         Label& done,
++                                         Label* slow_case,
++                                         BiasedLockingCounters* counters,
++                                         Register flag) {
++  assert(UseBiasedLocking, "why call this otherwise?");
++  assert_different_registers(lock_reg, obj_reg, swap_reg);
 +
-+  Address safepoint_counter_addr(rcounter_addr, 0);
-+  __ lwu(rcounter, safepoint_counter_addr);
-+  // An even value means there are no ongoing safepoint operations
-+  __ andi(t0, rcounter, 1);
-+  __ bnez(t0, slow);
++  if (PrintBiasedLockingStatistics && counters == NULL)
++    counters = BiasedLocking::counters();
 +
-+  if (JvmtiExport::can_post_field_access()) {
-+    // Using barrier to order wrt. JVMTI check and load of result.
-+    __ membar(MacroAssembler::LoadLoad);
++  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, t0);
++  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
++  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
 +
-+    // Check to see if a field access watch has been set before we
-+    // take the fast path.
-+    int32_t offset2;
-+    __ la_patchable(result,
-+                    ExternalAddress((address) JvmtiExport::get_field_access_count_addr()),
-+                    offset2);
-+    __ lwu(result, Address(result, offset2));
-+    __ bnez(result, slow);
-+
-+    __ mv(robj, c_rarg1);
++  // Biased locking
++  // See whether the lock is currently biased toward our thread and
++  // whether the epoch is still valid
++  // Note that the runtime guarantees sufficient alignment of JavaThread
++  // pointers to allow age to be placed into low bits
++  // First check to see whether biasing is even enabled for this object
++  Label cas_label;
++  int null_check_offset = -1;
++  if (!swap_reg_contains_mark) {
++    null_check_offset = offset();
++    ld(swap_reg, mark_addr);
++  }
++  andi(tmp_reg, swap_reg, markOopDesc::biased_lock_mask_in_place);
++  li(t0, markOopDesc::biased_lock_pattern);
++  bne(t0, tmp_reg, cas_label);
++  // The bias pattern is present in the object's header. Need to check
++  // whether the bias owner and the epoch are both still current.
++  load_prototype_header(tmp_reg, obj_reg);
++  orr(tmp_reg, tmp_reg, xthread);
++  xorr(tmp_reg, swap_reg, tmp_reg);
++  andi(tmp_reg, tmp_reg, ~((int) markOopDesc::age_mask_in_place));
++  if (flag->is_valid()) {
++    mv(flag, tmp_reg);
++  }
++  if (counters != NULL) {
++    Label around;
++    bnez(tmp_reg, around);
++    atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, t0);
++    j(done);
++    bind(around);
 +  } else {
-+    // Using address dependency to order wrt. load of result.
-+    __ xorr(robj, c_rarg1, rcounter);
-+    __ xorr(robj, robj, rcounter);               // obj, since
-+                                                 // robj ^ rcounter ^ rcounter == robj
-+                                                 // robj is address dependent on rcounter.
++    beqz(tmp_reg, done);
 +  }
 +
-+  // Both robj and t0 are clobbered by try_resolve_jobject_in_native.
-+  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+  assert_cond(bs != NULL);
-+  bs->try_resolve_jobject_in_native(masm, c_rarg0, robj, t0, slow);
++  Label try_revoke_bias;
++  Label try_rebias;
 +
-+  __ srli(roffset, c_rarg2, 2);                // offset
++  // At this point we know that the header has the bias pattern and
++  // that we are not the bias owner in the current epoch. We need to
++  // figure out more details about the state of the header in order to
++  // know what operations can be legally performed on the object's
++  // header.
 +
-+  assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
-+  speculative_load_pclist[count] = __ pc();   // Used by the segfault handler
-+  __ add(roffset, robj, roffset);
++  // If the low three bits in the xor result aren't clear, that means
++  // the prototype header is no longer biased and we have to revoke
++  // the bias on this object.
++  andi(t0, tmp_reg, markOopDesc::biased_lock_mask_in_place);
++  bnez(t0, try_revoke_bias);
 +
-+  switch (type) {
-+    case T_BOOLEAN: __ lbu(result, Address(roffset, 0)); break;
-+    case T_BYTE:    __ lb(result, Address(roffset, 0)); break;
-+    case T_CHAR:    __ lhu(result, Address(roffset, 0)); break;
-+    case T_SHORT:   __ lh(result, Address(roffset, 0)); break;
-+    case T_INT:     __ lw(result, Address(roffset, 0)); break;
-+    case T_LONG:    __ ld(result, Address(roffset, 0)); break;
-+    case T_FLOAT: {
-+      __ flw(f28, Address(roffset, 0)); // f28 as temporaries
-+      __ fmv_x_w(result, f28); // f{31--0}-->x
-+      break;
++  // Biasing is still enabled for this data type. See whether the
++  // epoch of the current bias is still valid, meaning that the epoch
++  // bits of the mark word are equal to the epoch bits of the
++  // prototype header. (Note that the prototype header's epoch bits
++  // only change at a safepoint.) If not, attempt to rebias the object
++  // toward the current thread. Note that we must be absolutely sure
++  // that the current epoch is invalid in order to do this because
++  // otherwise the manipulations it performs on the mark word are
++  // illegal.
++  andi(t0, tmp_reg, markOopDesc::epoch_mask_in_place);
++  bnez(t0, try_rebias);
++
++  // The epoch of the current bias is still valid but we know nothing
++  // about the owner; it might be set or it might be clear. Try to
++  // acquire the bias of the object using an atomic operation. If this
++  // fails we will go in to the runtime to revoke the object's bias.
++  // Note that we first construct the presumed unbiased header so we
++  // don't accidentally blow away another thread's valid bias.
++  {
++    Label cas_success;
++    Label counter;
++    mv(t0, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
++    andr(swap_reg, swap_reg, t0);
++    orr(tmp_reg, swap_reg, xthread);
++    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case);
++    // cas failed here if slow_cass == NULL
++    if (flag->is_valid()) {
++      mv(flag, 1);
++      j(counter);
 +    }
-+    case T_DOUBLE: {
-+      __ fld(f28, Address(roffset, 0)); // f28 as temporaries
-+      __ fmv_x_d(result, f28); // d{63--0}-->x
-+      break;
++    // If the biasing toward our thread failed, this means that
++    // another thread succeeded in biasing it toward itself and we
++    // need to revoke that bias. The revocation will occur in the
++    // interpreter runtime in the slow case.
++    bind(cas_success);
++    if (flag->is_valid()) {
++      mv(flag, 0);
++      bind(counter);
++    }
++    if (counters != NULL) {
++      atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()),
++                  tmp_reg, t0);
 +    }
-+    default:        ShouldNotReachHere();
 +  }
++  j(done);
 +
-+  // Using acquire: Order JVMTI check and load of result wrt. succeeding check
-+  // (LoadStore for volatile field).
-+  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
-+
-+  __ lw(t0, safepoint_counter_addr);
-+  __ bne(rcounter, t0, slow);
-+
-+  switch (type) {
-+    case T_FLOAT:   __ fmv_w_x(f10, result); break;
-+    case T_DOUBLE:  __ fmv_d_x(f10, result); break;
-+    default:        __ mv(x10, result);   break;
-+  }
-+  __ ret();
++  bind(try_rebias);
++  // At this point we know the epoch has expired, meaning that the
++  // current "bias owner", if any, is actually invalid. Under these
++  // circumstances _only_, we are allowed to use the current header's
++  // value as the comparison value when doing the cas to acquire the
++  // bias in the current epoch. In other words, we allow transfer of
++  // the bias from one thread to another directly in this situation.
++  //
++  // FIXME: due to a lack of registers we currently blow away the age
++  // bits in this situation. Should attempt to preserve them.
++  {
++    Label cas_success;
++    Label counter;
++    load_prototype_header(tmp_reg, obj_reg);
++    orr(tmp_reg, xthread, tmp_reg);
++    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case);
++    // cas failed here if slow_cass == NULL
++    if (flag->is_valid()) {
++      mv(flag, 1);
++      j(counter);
++    }
 +
-+  slowcase_entry_pclist[count++] = __ pc();
-+  __ bind(slow);
-+  address slow_case_addr;
-+  switch (type) {
-+    case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break;
-+    case T_BYTE:    slow_case_addr = jni_GetByteField_addr();    break;
-+    case T_CHAR:    slow_case_addr = jni_GetCharField_addr();    break;
-+    case T_SHORT:   slow_case_addr = jni_GetShortField_addr();   break;
-+    case T_INT:     slow_case_addr = jni_GetIntField_addr();     break;
-+    case T_LONG:    slow_case_addr = jni_GetLongField_addr();    break;
-+    case T_FLOAT:   slow_case_addr = jni_GetFloatField_addr();   break;
-+    case T_DOUBLE:  slow_case_addr = jni_GetDoubleField_addr();  break;
-+    default:        ShouldNotReachHere();
-+      slow_case_addr = NULL;  // unreachable
++    // If the biasing toward our thread failed, then another thread
++    // succeeded in biasing it toward itself and we need to revoke that
++    // bias. The revocation will occur in the runtime in the slow case.
++    bind(cas_success);
++    if (flag->is_valid()) {
++      mv(flag, 0);
++      bind(counter);
++    }
++    if (counters != NULL) {
++      atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()),
++                  tmp_reg, t0);
++    }
 +  }
++  j(done);
 +
++  bind(try_revoke_bias);
++  // The prototype mark in the klass doesn't have the bias bit set any
++  // more, indicating that objects of this data type are not supposed
++  // to be biased any more. We are going to try to reset the mark of
++  // this object to the prototype value and fall through to the
++  // CAS-based locking scheme. Note that if our CAS fails, it means
++  // that another thread raced us for the privilege of revoking the
++  // bias of this particular object, so it's okay to continue in the
++  // normal locking code.
++  //
++  // FIXME: due to a lack of registers we currently blow away the age
++  // bits in this situation. Should attempt to preserve them.
 +  {
-+    __ enter();
-+    int32_t tmp_offset = 0;
-+    __ la_patchable(t0, ExternalAddress(slow_case_addr), tmp_offset);
-+    __ jalr(x1, t0, tmp_offset);
-+    __ leave();
-+    __ ret();
-+  }
-+  __ flush();
-+
-+  return fast_entry;
-+}
++    Label cas_success, nope;
++    load_prototype_header(tmp_reg, obj_reg);
++    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, &nope);
++    bind(cas_success);
 +
++    // Fall through to the normal CAS-based lock, because no matter what
++    // the result of the above CAS, some thread must have succeeded in
++    // removing the bias bit from the object's header.
++    if (counters != NULL) {
++      atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg,
++                  t0);
++    }
++    bind(nope);
++  }
 +
-+address JNI_FastGetField::generate_fast_get_boolean_field() {
-+  return generate_fast_get_int_field0(T_BOOLEAN);
-+}
++  bind(cas_label);
 +
-+address JNI_FastGetField::generate_fast_get_byte_field() {
-+  return generate_fast_get_int_field0(T_BYTE);
++  return null_check_offset;
 +}
 +
-+address JNI_FastGetField::generate_fast_get_char_field() {
-+  return generate_fast_get_int_field0(T_CHAR);
-+}
++void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done, Register flag) {
++  assert(UseBiasedLocking, "why call this otherwise?");
 +
-+address JNI_FastGetField::generate_fast_get_short_field() {
-+  return generate_fast_get_int_field0(T_SHORT);
++  // Check for biased locking unlock case, which is a no-op
++  // Note: we do not have to check the thread ID for two reasons.
++  // First, the interpreter checks for IllegalMonitorStateException at
++  // a higher level. Second, if the bias was revoked while we held the
++  // lock, the object could not be rebiased toward another thread, so
++  // the bias bit would be clear.
++  ld(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
++  andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
++  sub(tmp_reg, tmp_reg, markOopDesc::biased_lock_pattern);
++  if (flag->is_valid()) { mv(flag, tmp_reg); }
++  beqz(tmp_reg, done);
 +}
 +
-+address JNI_FastGetField::generate_fast_get_int_field() {
-+  return generate_fast_get_int_field0(T_INT);
++// Move the address of the polling page into dest.
++void MacroAssembler::get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype) {
++  if (SafepointMechanism::uses_thread_local_poll()) {
++    ld(dest, Address(xthread, Thread::polling_page_offset()));
++  } else {
++    uint64_t align = (uint64_t)page & 0xfff;
++    assert(align == 0, "polling page must be page aligned");
++    la_patchable(dest, Address(page, rtype), offset);
++  }
 +}
 +
-+address JNI_FastGetField::generate_fast_get_long_field() {
-+  return generate_fast_get_int_field0(T_LONG);
++// Read the polling page.  The address of the polling page must
++// already be in r.
++void MacroAssembler::read_polling_page(Register dest, address page, relocInfo::relocType rtype) {
++  int32_t offset = 0;
++  get_polling_page(dest, page, offset, rtype);
++  read_polling_page(dest, offset, rtype);
 +}
 +
-+address JNI_FastGetField::generate_fast_get_float_field() {
-+  return generate_fast_get_int_field0(T_FLOAT);
++// Read the polling page.  The address of the polling page must
++// already be in r.
++void MacroAssembler::read_polling_page(Register dest, int32_t offset, relocInfo::relocType rtype) {
++  relocate(rtype);
++  lwu(zr, Address(dest, offset));
 +}
 +
-+address JNI_FastGetField::generate_fast_get_double_field() {
-+  return generate_fast_get_int_field0(T_DOUBLE);
++void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
++#ifdef ASSERT
++  {
++    ThreadInVMfromUnknown tiv;
++    assert (UseCompressedOops, "should only be used for compressed oops");
++    assert (Universe::heap() != NULL, "java heap should be initialized");
++    assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
++    assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop");
++  }
++#endif
++  int oop_index = oop_recorder()->find_index(obj);
++  relocate(oop_Relocation::spec(oop_index));
++  li32(dst, 0xDEADBEEF);
++  zero_extend(dst, dst, 32);
 +}
-diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
-new file mode 100644
-index 00000000000..83ffcc55d83
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
-@@ -0,0 +1,106 @@
-+/*
-+ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#ifndef CPU_RISCV_JNITYPES_RISCV_HPP
-+#define CPU_RISCV_JNITYPES_RISCV_HPP
 +
-+#include "jni.h"
-+#include "memory/allStatic.hpp"
-+#include "oops/oop.hpp"
-+
-+// This file holds platform-dependent routines used to write primitive jni
-+// types to the array of arguments passed into JavaCalls::call
++void  MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
++  assert (UseCompressedClassPointers, "should only be used for compressed headers");
++  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
++  int index = oop_recorder()->find_index(k);
++  assert(!Universe::heap()->is_in_reserved(k), "should not be an oop");
 +
-+class JNITypes : private AllStatic {
-+  // These functions write a java primitive type (in native format)
-+  // to a java stack slot array to be passed as an argument to JavaCalls:calls.
-+  // I.e., they are functionally 'push' operations if they have a 'pos'
-+  // formal parameter.  Note that jlong's and jdouble's are written
-+  // _in reverse_ of the order in which they appear in the interpreter
-+  // stack.  This is because call stubs (see stubGenerator_sparc.cpp)
-+  // reverse the argument list constructed by JavaCallArguments (see
-+  // javaCalls.hpp).
++  narrowKlass nk = Klass::encode_klass(k);
++  relocate(metadata_Relocation::spec(index));
++  li32(dst, nk);
++  zero_extend(dst, dst, 32);
++}
 +
-+public:
-+  // Ints are stored in native format in one JavaCallArgument slot at *to.
-+  static inline void    put_int(jint  from, intptr_t *to)           { *(jint *)(to +   0  ) =  from; }
-+  static inline void    put_int(jint  from, intptr_t *to, int& pos) { *(jint *)(to + pos++) =  from; }
-+  static inline void    put_int(jint *from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = *from; }
++// Maybe emit a call via a trampoline.  If the code cache is small
++// trampolines won't be emitted.
++address MacroAssembler::trampoline_call(Address entry, CodeBuffer* cbuf) {
++  assert(JavaThread::current()->is_Compiler_thread(), "just checking");
++  assert(entry.rspec().type() == relocInfo::runtime_call_type ||
++         entry.rspec().type() == relocInfo::opt_virtual_call_type ||
++         entry.rspec().type() == relocInfo::static_call_type ||
++         entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
 +
-+  // Longs are stored in native format in one JavaCallArgument slot at
-+  // *(to+1).
-+  static inline void put_long(jlong  from, intptr_t *to) {
-+    *(jlong*) (to + 1) = from;
++  // We need a trampoline if branches are far.
++  if (far_branches()) {
++    bool in_scratch_emit_size = false;
++#ifdef COMPILER2
++    // We don't want to emit a trampoline if C2 is generating dummy
++    // code during its branch shortening phase.
++    CompileTask* task = ciEnv::current()->task();
++    in_scratch_emit_size =
++      (task != NULL && is_c2_compile(task->comp_level()) &&
++       Compile::current()->in_scratch_emit_size());
++#endif
++    if (!in_scratch_emit_size) {
++      address stub = emit_trampoline_stub(offset(), entry.target());
++      if (stub == NULL) {
++        postcond(pc() == badAddress);
++        return NULL; // CodeCache is full
++      }
++    }
 +  }
 +
-+  static inline void put_long(jlong  from, intptr_t *to, int& pos) {
-+    *(jlong*) (to + 1 + pos) = from;
-+    pos += 2;
++  if (cbuf != NULL) { cbuf->set_insts_mark(); }
++#ifdef ASSERT
++  if (entry.rspec().type() != relocInfo::runtime_call_type) {
++    assert_alignment(pc());
 +  }
-+
-+  static inline void put_long(jlong *from, intptr_t *to, int& pos) {
-+    *(jlong*) (to + 1 + pos) = *from;
-+    pos += 2;
++#endif
++  relocate(entry.rspec());
++  if (!far_branches()) {
++    jal(entry.target());
++  } else {
++    jal(pc());
 +  }
++  // just need to return a non-null address
++  postcond(pc() != badAddress);
++  return pc();
++}
 +
-+  // Oops are stored in native format in one JavaCallArgument slot at *to.
-+  static inline void    put_obj(const Handle& from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle.raw_value(); }
-+  static inline void    put_obj(jobject       from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle; }
++address MacroAssembler::ic_call(address entry, jint method_index) {
++  RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
++  movptr(t1, (address)Universe::non_oop_word());
++  assert_cond(entry != NULL);
++  return trampoline_call(Address(entry, rh));
++}
 +
-+  // Floats are stored in native format in one JavaCallArgument slot at *to.
-+  static inline void    put_float(jfloat  from, intptr_t *to)           { *(jfloat *)(to +   0  ) =  from;  }
-+  static inline void    put_float(jfloat  from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) =  from; }
-+  static inline void    put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; }
++// Emit a trampoline stub for a call to a target which is too far away.
++//
++// code sequences:
++//
++// call-site:
++//   branch-and-link to <destination> or <trampoline stub>
++//
++// Related trampoline stub for this call site in the stub section:
++//   load the call target from the constant pool
++//   branch (RA still points to the call site above)
 +
-+#undef _JNI_SLOT_OFFSET
-+#define _JNI_SLOT_OFFSET 1
-+  // Doubles are stored in native word format in one JavaCallArgument
-+  // slot at *(to+1).
-+  static inline void put_double(jdouble  from, intptr_t *to) {
-+    *(jdouble*) (to + 1) = from;
++address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
++                                             address dest) {
++  address stub = start_a_stub(NativeInstruction::instruction_size
++                            + NativeCallTrampolineStub::instruction_size);
++  if (stub == NULL) {
++    return NULL;  // CodeBuffer::expand failed
 +  }
 +
-+  static inline void put_double(jdouble  from, intptr_t *to, int& pos) {
-+    *(jdouble*) (to + 1 + pos) = from;
-+    pos += 2;
-+  }
++  // Create a trampoline stub relocation which relates this trampoline stub
++  // with the call instruction at insts_call_instruction_offset in the
++  // instructions code-section.
 +
-+  static inline void put_double(jdouble *from, intptr_t *to, int& pos) {
-+    *(jdouble*) (to + 1 + pos) = *from;
-+    pos += 2;
-+  }
++  // make sure 4 byte aligned here, so that the destination address would be
++  // 8 byte aligned after 3 intructions
++  // when we reach here we may get a 2-byte alignment so need to align it
++  align(wordSize, NativeCallTrampolineStub::data_offset);
 +
-+  // The get_xxx routines, on the other hand, actually _do_ fetch
-+  // java primitive types from the interpreter stack.
-+  // No need to worry about alignment on Intel.
-+  static inline jint    get_int   (intptr_t *from) { return *(jint *)   from; }
-+  static inline jlong   get_long  (intptr_t *from) { return *(jlong *)  (from + _JNI_SLOT_OFFSET); }
-+  static inline oop     get_obj   (intptr_t *from) { return *(oop *)    from; }
-+  static inline jfloat  get_float (intptr_t *from) { return *(jfloat *) from; }
-+  static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); }
-+#undef _JNI_SLOT_OFFSET
-+};
++  relocate(trampoline_stub_Relocation::spec(code()->insts()->start() +
++                                            insts_call_instruction_offset));
++  const int stub_start_offset = offset();
 +
-+#endif // CPU_RISCV_JNITYPES_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-new file mode 100644
-index 00000000000..86710295444
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -0,0 +1,4016 @@
-+/*
-+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  // Now, create the trampoline stub's code:
++  // - load the call
++  // - call
++  Label target;
++  ld(t0, target);  // auipc + ld
++  jr(t0);          // jalr
++  bind(target);
++  assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset,
++         "should be");
++  assert(offset() % wordSize == 0, "bad alignment");
++  emit_int64((intptr_t)dest);
 +
-+#include "precompiled.hpp"
-+#include "asm/assembler.hpp"
-+#include "asm/assembler.inline.hpp"
-+#include "compiler/disassembler.hpp"
-+#include "gc/shared/barrierSet.hpp"
-+#include "gc/shared/barrierSetAssembler.hpp"
-+#include "gc/shared/cardTable.hpp"
-+#include "gc/shared/cardTableBarrierSet.hpp"
-+#include "interpreter/bytecodeHistogram.hpp"
-+#include "interpreter/interpreter.hpp"
-+#include "memory/resourceArea.hpp"
-+#include "memory/universe.hpp"
-+#include "nativeInst_riscv.hpp"
-+#include "oops/accessDecorators.hpp"
-+#include "oops/compressedOops.inline.hpp"
-+#include "oops/klass.inline.hpp"
-+#include "oops/oop.hpp"
-+#include "runtime/interfaceSupport.inline.hpp"
-+#include "runtime/jniHandles.inline.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "runtime/stubRoutines.hpp"
-+#include "runtime/thread.hpp"
-+#include "utilities/powerOfTwo.hpp"
-+#ifdef COMPILER2
-+#include "opto/compile.hpp"
-+#include "opto/node.hpp"
-+#include "opto/output.hpp"
-+#endif
++  const address stub_start_addr = addr_at(stub_start_offset);
 +
-+#ifdef PRODUCT
-+#define BLOCK_COMMENT(str) /* nothing */
-+#else
-+#define BLOCK_COMMENT(str) block_comment(str)
-+#endif
-+#define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":")
++  assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
 +
-+static void pass_arg0(MacroAssembler* masm, Register arg) {
-+  if (c_rarg0 != arg) {
-+    assert_cond(masm != NULL);
-+    masm->mv(c_rarg0, arg);
-+  }
++  end_a_stub();
++  return stub_start_addr;
 +}
 +
-+static void pass_arg1(MacroAssembler* masm, Register arg) {
-+  if (c_rarg1 != arg) {
-+    assert_cond(masm != NULL);
-+    masm->mv(c_rarg1, arg);
++Address MacroAssembler::add_memory_helper(const Address dst) {
++  switch (dst.getMode()) {
++    case Address::base_plus_offset:
++      // This is the expected mode, although we allow all the other
++      // forms below.
++      return form_address(t1, dst.base(), dst.offset());
++    default:
++      la(t1, dst);
++      return Address(t1);
 +  }
 +}
 +
-+static void pass_arg2(MacroAssembler* masm, Register arg) {
-+  if (c_rarg2 != arg) {
-+    assert_cond(masm != NULL);
-+    masm->mv(c_rarg2, arg);
-+  }
++void MacroAssembler::increment(const Address dst, int64_t value) {
++  assert(((dst.getMode() == Address::base_plus_offset &&
++           is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)),
++          "invalid value and address mode combination");
++  Address adr = add_memory_helper(dst);
++  assert(!adr.uses(t0), "invalid dst for address increment");
++  ld(t0, adr);
++  add(t0, t0, value, t1);
++  sd(t0, adr);
 +}
 +
-+static void pass_arg3(MacroAssembler* masm, Register arg) {
-+  if (c_rarg3 != arg) {
-+    assert_cond(masm != NULL);
-+    masm->mv(c_rarg3, arg);
-+  }
++void MacroAssembler::incrementw(const Address dst, int32_t value) {
++  assert(((dst.getMode() == Address::base_plus_offset &&
++           is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)),
++          "invalid value and address mode combination");
++  Address adr = add_memory_helper(dst);
++  assert(!adr.uses(t0), "invalid dst for address increment");
++  lwu(t0, adr);
++  addw(t0, t0, value, t1);
++  sw(t0, adr);
 +}
 +
-+void MacroAssembler::align(int modulus, int extra_offset) {
-+  CompressibleRegion cr(this);
-+  while ((offset() + extra_offset) % modulus != 0) { nop(); }
++void MacroAssembler::decrement(const Address dst, int64_t value) {
++  assert(((dst.getMode() == Address::base_plus_offset &&
++           is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)),
++          "invalid value and address mode combination");
++  Address adr = add_memory_helper(dst);
++  assert(!adr.uses(t0), "invalid dst for address decrement");
++  ld(t0, adr);
++  sub(t0, t0, value, t1);
++  sd(t0, adr);
 +}
 +
-+void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
-+  call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions);
++void MacroAssembler::decrementw(const Address dst, int32_t value) {
++  assert(((dst.getMode() == Address::base_plus_offset &&
++           is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)),
++          "invalid value and address mode combination");
++  Address adr = add_memory_helper(dst);
++  assert(!adr.uses(t0), "invalid dst for address decrement");
++  lwu(t0, adr);
++  subw(t0, t0, value, t1);
++  sw(t0, adr);
 +}
 +
-+// Implementation of call_VM versions
-+
-+void MacroAssembler::call_VM(Register oop_result,
-+                             address entry_point,
-+                             bool check_exceptions) {
-+  call_VM_helper(oop_result, entry_point, 0, check_exceptions);
++void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) {
++  assert_different_registers(src1, t0);
++  int32_t offset;
++  la_patchable(t0, src2, offset);
++  ld(t0, Address(t0, offset));
++  beq(src1, t0, equal);
 +}
 +
-+void MacroAssembler::call_VM(Register oop_result,
-+                             address entry_point,
-+                             Register arg_1,
-+                             bool check_exceptions) {
-+  pass_arg1(this, arg_1);
-+  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
++// string indexof
++// compute index by trailing zeros
++void MacroAssembler::compute_index(Register haystack, Register trailing_zeros,
++                                   Register match_mask, Register result,
++                                   Register ch2, Register tmp,
++                                   bool haystack_isL)
++{
++  int haystack_chr_shift = haystack_isL ? 0 : 1;
++  srl(match_mask, match_mask, trailing_zeros);
++  srli(match_mask, match_mask, 1);
++  srli(tmp, trailing_zeros, LogBitsPerByte);
++  if (!haystack_isL) andi(tmp, tmp, 0xE);
++  add(haystack, haystack, tmp);
++  ld(ch2, Address(haystack));
++  if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift);
++  add(result, result, tmp);
 +}
 +
-+void MacroAssembler::call_VM(Register oop_result,
-+                             address entry_point,
-+                             Register arg_1,
-+                             Register arg_2,
-+                             bool check_exceptions) {
-+  assert(arg_1 != c_rarg2, "smashed arg");
-+  pass_arg2(this, arg_2);
-+  pass_arg1(this, arg_1);
-+  call_VM_helper(oop_result, entry_point, 2, check_exceptions);
++// string indexof
++// Find pattern element in src, compute match mask,
++// only the first occurrence of 0x80/0x8000 at low bits is the valid match index
++// match mask patterns and corresponding indices would be like:
++// - 0x8080808080808080 (Latin1)
++// -   7 6 5 4 3 2 1 0  (match index)
++// - 0x8000800080008000 (UTF16)
++// -   3   2   1   0    (match index)
++void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask,
++                                        Register mask1, Register mask2)
++{
++  xorr(src, pattern, src);
++  sub(match_mask, src, mask1);
++  orr(src, src, mask2);
++  notr(src, src);
++  andr(match_mask, match_mask, src);
 +}
 +
-+void MacroAssembler::call_VM(Register oop_result,
-+                             address entry_point,
-+                             Register arg_1,
-+                             Register arg_2,
-+                             Register arg_3,
-+                             bool check_exceptions) {
-+  assert(arg_1 != c_rarg3, "smashed arg");
-+  assert(arg_2 != c_rarg3, "smashed arg");
-+  pass_arg3(this, arg_3);
++#ifdef COMPILER2
++// Code for BigInteger::mulAdd instrinsic
++// out     = x10
++// in      = x11
++// offset  = x12  (already out.length-offset)
++// len     = x13
++// k       = x14
++// tmp     = x28
++//
++// pseudo code from java implementation:
++// long kLong = k & LONG_MASK;
++// carry = 0;
++// offset = out.length-offset - 1;
++// for (int j = len - 1; j >= 0; j--) {
++//     product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry;
++//     out[offset--] = (int)product;
++//     carry = product >>> 32;
++// }
++// return (int)carry;
++void MacroAssembler::mul_add(Register out, Register in, Register offset,
++                             Register len, Register k, Register tmp) {
++  Label L_tail_loop, L_unroll, L_end;
++  mv(tmp, out);
++  mv(out, zr);
++  blez(len, L_end);
++  zero_extend(k, k, 32);
++  slliw(t0, offset, LogBytesPerInt);
++  add(offset, tmp, t0);
++  slliw(t0, len, LogBytesPerInt);
++  add(in, in, t0);
 +
-+  assert(arg_1 != c_rarg2, "smashed arg");
-+  pass_arg2(this, arg_2);
++  const int unroll = 8;
++  mv(tmp, unroll);
++  blt(len, tmp, L_tail_loop);
++  bind(L_unroll);
++  for (int i = 0; i < unroll; i++) {
++    sub(in, in, BytesPerInt);
++    lwu(t0, Address(in, 0));
++    mul(t1, t0, k);
++    add(t0, t1, out);
++    sub(offset, offset, BytesPerInt);
++    lwu(t1, Address(offset, 0));
++    add(t0, t0, t1);
++    sw(t0, Address(offset, 0));
++    srli(out, t0, 32);
++  }
++  subw(len, len, tmp);
++  bge(len, tmp, L_unroll);
 +
-+  pass_arg1(this, arg_1);
-+  call_VM_helper(oop_result, entry_point, 3, check_exceptions);
-+}
++  bind(L_tail_loop);
++  blez(len, L_end);
++  sub(in, in, BytesPerInt);
++  lwu(t0, Address(in, 0));
++  mul(t1, t0, k);
++  add(t0, t1, out);
++  sub(offset, offset, BytesPerInt);
++  lwu(t1, Address(offset, 0));
++  add(t0, t0, t1);
++  sw(t0, Address(offset, 0));
++  srli(out, t0, 32);
++  subw(len, len, 1);
++  j(L_tail_loop);
 +
-+void MacroAssembler::call_VM(Register oop_result,
-+                             Register last_java_sp,
-+                             address entry_point,
-+                             int number_of_arguments,
-+                             bool check_exceptions) {
-+  call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
++  bind(L_end);
 +}
 +
-+void MacroAssembler::call_VM(Register oop_result,
-+                             Register last_java_sp,
-+                             address entry_point,
-+                             Register arg_1,
-+                             bool check_exceptions) {
-+  pass_arg1(this, arg_1);
-+  call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
++// add two unsigned input and output carry
++void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry)
++{
++  assert_different_registers(dst, carry);
++  assert_different_registers(dst, src2);
++  add(dst, src1, src2);
++  sltu(carry, dst, src2);
 +}
 +
-+void MacroAssembler::call_VM(Register oop_result,
-+                             Register last_java_sp,
-+                             address entry_point,
-+                             Register arg_1,
-+                             Register arg_2,
-+                             bool check_exceptions) {
-+
-+  assert(arg_1 != c_rarg2, "smashed arg");
-+  pass_arg2(this, arg_2);
-+  pass_arg1(this, arg_1);
-+  call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
++// add two input with carry
++void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry)
++{
++  assert_different_registers(dst, carry);
++  add(dst, src1, src2);
++  add(dst, dst, carry);
 +}
 +
-+void MacroAssembler::call_VM(Register oop_result,
-+                             Register last_java_sp,
-+                             address entry_point,
-+                             Register arg_1,
-+                             Register arg_2,
-+                             Register arg_3,
-+                             bool check_exceptions) {
-+  assert(arg_1 != c_rarg3, "smashed arg");
-+  assert(arg_2 != c_rarg3, "smashed arg");
-+  pass_arg3(this, arg_3);
-+  assert(arg_1 != c_rarg2, "smashed arg");
-+  pass_arg2(this, arg_2);
-+  pass_arg1(this, arg_1);
-+  call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
++// add two unsigned input with carry and output carry
++void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry)
++{
++  assert_different_registers(dst, src2);
++  adc(dst, src1, src2, carry);
++  sltu(carry, dst, src2);
 +}
 +
-+// these are no-ops overridden by InterpreterMacroAssembler
-+void MacroAssembler::check_and_handle_earlyret(Register java_thread) {}
-+void MacroAssembler::check_and_handle_popframe(Register java_thread) {}
++void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
++                                     Register src1, Register src2, Register carry)
++{
++  cad(dest_lo, dest_lo, src1, carry);
++  add(dest_hi, dest_hi, carry);
++  cad(dest_lo, dest_lo, src2, carry);
++  add(final_dest_hi, dest_hi, carry);
++}
 +
-+// Calls to C land
-+//
-+// When entering C land, the fp, & esp of the last Java frame have to be recorded
-+// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
-+// has to be reset to 0. This is required to allow proper stack traversal.
-+void MacroAssembler::set_last_Java_frame(Register last_java_sp,
-+                                         Register last_java_fp,
-+                                         Register last_java_pc,
-+                                         Register tmp) {
++/**
++ * Multiply 32 bit by 32 bit first loop.
++ */
++void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
++                                           Register y, Register y_idx, Register z,
++                                           Register carry, Register product,
++                                           Register idx, Register kdx)
++{
++  // jlong carry, x[], y[], z[];
++  // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
++  //     long product = y[idx] * x[xstart] + carry;
++  //     z[kdx] = (int)product;
++  //     carry = product >>> 32;
++  // }
++  // z[xstart] = (int)carry;
 +
-+  if (last_java_pc->is_valid()) {
-+      sd(last_java_pc, Address(xthread,
-+                               JavaThread::frame_anchor_offset() +
-+                               JavaFrameAnchor::last_Java_pc_offset()));
-+  }
++  Label L_first_loop, L_first_loop_exit;
++  blez(idx, L_first_loop_exit);
 +
-+  // determine last_java_sp register
-+  if (last_java_sp == sp) {
-+    mv(tmp, sp);
-+    last_java_sp = tmp;
-+  } else if (!last_java_sp->is_valid()) {
-+    last_java_sp = esp;
-+  }
++  shadd(t0, xstart, x, t0, LogBytesPerInt);
++  lwu(x_xstart, Address(t0, 0));
 +
-+  sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset()));
++  bind(L_first_loop);
++  subw(idx, idx, 1);
++  shadd(t0, idx, y, t0, LogBytesPerInt);
++  lwu(y_idx, Address(t0, 0));
++  mul(product, x_xstart, y_idx);
++  add(product, product, carry);
++  srli(carry, product, 32);
++  subw(kdx, kdx, 1);
++  shadd(t0, kdx, z, t0, LogBytesPerInt);
++  sw(product, Address(t0, 0));
++  bgtz(idx, L_first_loop);
 +
-+  // last_java_fp is optional
-+  if (last_java_fp->is_valid()) {
-+    sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset()));
-+  }
++  bind(L_first_loop_exit);
 +}
 +
-+void MacroAssembler::set_last_Java_frame(Register last_java_sp,
-+                                         Register last_java_fp,
-+                                         address  last_java_pc,
-+                                         Register tmp) {
-+  assert(last_java_pc != NULL, "must provide a valid PC");
++/**
++ * Multiply 64 bit by 64 bit first loop.
++ */
++void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
++                                           Register y, Register y_idx, Register z,
++                                           Register carry, Register product,
++                                           Register idx, Register kdx)
++{
++  //
++  //  jlong carry, x[], y[], z[];
++  //  for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
++  //    huge_128 product = y[idx] * x[xstart] + carry;
++  //    z[kdx] = (jlong)product;
++  //    carry  = (jlong)(product >>> 64);
++  //  }
++  //  z[xstart] = carry;
++  //
 +
-+  la(tmp, last_java_pc);
-+  sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++  Label L_first_loop, L_first_loop_exit;
++  Label L_one_x, L_one_y, L_multiply;
 +
-+  set_last_Java_frame(last_java_sp, last_java_fp, noreg, tmp);
-+}
++  subw(xstart, xstart, 1);
++  bltz(xstart, L_one_x);
 +
-+void MacroAssembler::set_last_Java_frame(Register last_java_sp,
-+                                         Register last_java_fp,
-+                                         Label &L,
-+                                         Register tmp) {
-+  if (L.is_bound()) {
-+    set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp);
-+  } else {
-+    InstructionMark im(this);
-+    L.add_patch_at(code(), locator());
-+    set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp);
-+  }
-+}
++  shadd(t0, xstart, x, t0, LogBytesPerInt);
++  ld(x_xstart, Address(t0, 0));
++  ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian
 +
-+void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
-+  // we must set sp to zero to clear frame
-+  sd(zr, Address(xthread, JavaThread::last_Java_sp_offset()));
++  bind(L_first_loop);
++  subw(idx, idx, 1);
++  bltz(idx, L_first_loop_exit);
++  subw(idx, idx, 1);
++  bltz(idx, L_one_y);
 +
-+  // must clear fp, so that compiled frames are not confused; it is
-+  // possible that we need it only for debugging
-+  if (clear_fp) {
-+    sd(zr, Address(xthread, JavaThread::last_Java_fp_offset()));
-+  }
++  shadd(t0, idx, y, t0, LogBytesPerInt);
++  ld(y_idx, Address(t0, 0));
++  ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian
++  bind(L_multiply);
 +
-+  // Always clear the pc because it could have been set by make_walkable()
-+  sd(zr, Address(xthread, JavaThread::last_Java_pc_offset()));
-+}
++  mulhu(t0, x_xstart, y_idx);
++  mul(product, x_xstart, y_idx);
++  cad(product, product, carry, t1);
++  adc(carry, t0, zr, t1);
 +
-+void MacroAssembler::call_VM_base(Register oop_result,
-+                                  Register java_thread,
-+                                  Register last_java_sp,
-+                                  address  entry_point,
-+                                  int      number_of_arguments,
-+                                  bool     check_exceptions) {
-+   // determine java_thread register
-+  if (!java_thread->is_valid()) {
-+    java_thread = xthread;
-+  }
-+  // determine last_java_sp register
-+  if (!last_java_sp->is_valid()) {
-+    last_java_sp = esp;
-+  }
++  subw(kdx, kdx, 2);
++  ror_imm(product, product, 32); // back to big-endian
++  shadd(t0, kdx, z, t0, LogBytesPerInt);
++  sd(product, Address(t0, 0));
 +
-+  // debugging support
-+  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
-+  assert(java_thread == xthread, "unexpected register");
++  j(L_first_loop);
 +
-+  assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
-+  assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
++  bind(L_one_y);
++  lwu(y_idx, Address(y, 0));
++  j(L_multiply);
 +
-+  // push java thread (becomes first argument of C function)
-+  mv(c_rarg0, java_thread);
++  bind(L_one_x);
++  lwu(x_xstart, Address(x, 0));
++  j(L_first_loop);
 +
-+  // set last Java frame before call
-+  assert(last_java_sp != fp, "can't use fp");
++  bind(L_first_loop_exit);
++}
 +
-+  Label l;
-+  set_last_Java_frame(last_java_sp, fp, l, t0);
++/**
++ * Multiply 128 bit by 128 bit. Unrolled inner loop.
++ *
++ */
++void MacroAssembler::multiply_128_x_128_loop(Register y, Register z,
++                                             Register carry, Register carry2,
++                                             Register idx, Register jdx,
++                                             Register yz_idx1, Register yz_idx2,
++                                             Register tmp, Register tmp3, Register tmp4,
++                                             Register tmp6, Register product_hi)
++{
++  //   jlong carry, x[], y[], z[];
++  //   int kdx = xstart+1;
++  //   for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
++  //     huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry;
++  //     jlong carry2  = (jlong)(tmp3 >>> 64);
++  //     huge_128 tmp4 = (y[idx]   * product_hi) + z[kdx+idx] + carry2;
++  //     carry  = (jlong)(tmp4 >>> 64);
++  //     z[kdx+idx+1] = (jlong)tmp3;
++  //     z[kdx+idx] = (jlong)tmp4;
++  //   }
++  //   idx += 2;
++  //   if (idx > 0) {
++  //     yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry;
++  //     z[kdx+idx] = (jlong)yz_idx1;
++  //     carry  = (jlong)(yz_idx1 >>> 64);
++  //   }
++  //
 +
-+  // do the call, remove parameters
-+  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l);
++  Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
 +
-+  // reset last Java frame
-+  // Only interpreter should have to clear fp
-+  reset_last_Java_frame(true);
++  srliw(jdx, idx, 2);
 +
-+   // C++ interp handles this in the interpreter
-+  check_and_handle_popframe(java_thread);
-+  check_and_handle_earlyret(java_thread);
++  bind(L_third_loop);
 +
-+  if (check_exceptions) {
-+    // check for pending exceptions (java_thread is set upon return)
-+    ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset())));
-+    Label ok;
-+    beqz(t0, ok);
-+    int32_t offset = 0;
-+    la_patchable(t0, RuntimeAddress(StubRoutines::forward_exception_entry()), offset);
-+    jalr(x0, t0, offset);
-+    bind(ok);
-+  }
++  subw(jdx, jdx, 1);
++  bltz(jdx, L_third_loop_exit);
++  subw(idx, idx, 4);
 +
-+  // get oop result if there is one and reset the value in the thread
-+  if (oop_result->is_valid()) {
-+    get_vm_result(oop_result, java_thread);
-+  }
-+}
++  shadd(t0, idx, y, t0, LogBytesPerInt);
++  ld(yz_idx2, Address(t0, 0));
++  ld(yz_idx1, Address(t0, wordSize));
 +
-+void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
-+  ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
-+  sd(zr, Address(java_thread, JavaThread::vm_result_offset()));
-+  verify_oop(oop_result, "broken oop in call_VM_base");
-+}
++  shadd(tmp6, idx, z, t0, LogBytesPerInt);
 +
-+void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
-+  ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
-+  sd(zr, Address(java_thread, JavaThread::vm_result_2_offset()));
-+}
++  ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian
++  ror_imm(yz_idx2, yz_idx2, 32);
 +
-+void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_path, Label* L_slow_path) {
-+  assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required");
-+  assert_different_registers(klass, xthread, tmp);
++  ld(t1, Address(tmp6, 0));
++  ld(t0, Address(tmp6, wordSize));
 +
-+  Label L_fallthrough, L_tmp;
-+  if (L_fast_path == NULL) {
-+    L_fast_path = &L_fallthrough;
-+  } else if (L_slow_path == NULL) {
-+    L_slow_path = &L_fallthrough;
-+  }
++  mul(tmp3, product_hi, yz_idx1); //  yz_idx1 * product_hi -> tmp4:tmp3
++  mulhu(tmp4, product_hi, yz_idx1);
 +
-+  // Fast path check: class is fully initialized
-+  lbu(tmp, Address(klass, InstanceKlass::init_state_offset()));
-+  sub(tmp, tmp, InstanceKlass::fully_initialized);
-+  beqz(tmp, *L_fast_path);
++  ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian
++  ror_imm(t1, t1, 32, tmp);
 +
-+  // Fast path check: current thread is initializer thread
-+  ld(tmp, Address(klass, InstanceKlass::init_thread_offset()));
++  mul(tmp, product_hi, yz_idx2); //  yz_idx2 * product_hi -> carry2:tmp
++  mulhu(carry2, product_hi, yz_idx2);
 +
-+  if (L_slow_path == &L_fallthrough) {
-+    beq(xthread, tmp, *L_fast_path);
-+    bind(*L_slow_path);
-+  } else if (L_fast_path == &L_fallthrough) {
-+    bne(xthread, tmp, *L_slow_path);
-+    bind(*L_fast_path);
-+  } else {
-+    Unimplemented();
-+  }
-+}
++  cad(tmp3, tmp3, carry, carry);
++  adc(tmp4, tmp4, zr, carry);
++  cad(tmp3, tmp3, t0, t0);
++  cadc(tmp4, tmp4, tmp, t0);
++  adc(carry, carry2, zr, t0);
++  cad(tmp4, tmp4, t1, carry2);
++  adc(carry, carry, zr, carry2);
 +
-+void MacroAssembler::verify_oop(Register reg, const char* s) {
-+  if (!VerifyOops) { return; }
++  ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian
++  ror_imm(tmp4, tmp4, 32);
++  sd(tmp4, Address(tmp6, 0));
++  sd(tmp3, Address(tmp6, wordSize));
 +
-+  // Pass register number to verify_oop_subroutine
-+  const char* b = NULL;
-+  {
-+    ResourceMark rm;
-+    stringStream ss;
-+    ss.print("verify_oop: %s: %s", reg->name(), s);
-+    b = code_string(ss.as_string());
-+  }
-+  BLOCK_COMMENT("verify_oop {");
++  j(L_third_loop);
 +
-+  push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
++  bind(L_third_loop_exit);
 +
-+  mv(c_rarg0, reg); // c_rarg0 : x10
-+  li(t0, (uintptr_t)(address)b);
++  andi(idx, idx, 0x3);
++  beqz(idx, L_post_third_loop_done);
 +
-+  // call indirectly to solve generation ordering problem
-+  int32_t offset = 0;
-+  la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset);
-+  ld(t1, Address(t1, offset));
-+  jalr(t1);
++  Label L_check_1;
++  subw(idx, idx, 2);
++  bltz(idx, L_check_1);
 +
-+  pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
++  shadd(t0, idx, y, t0, LogBytesPerInt);
++  ld(yz_idx1, Address(t0, 0));
++  ror_imm(yz_idx1, yz_idx1, 32);
 +
-+  BLOCK_COMMENT("} verify_oop");
-+}
++  mul(tmp3, product_hi, yz_idx1); //  yz_idx1 * product_hi -> tmp4:tmp3
++  mulhu(tmp4, product_hi, yz_idx1);
 +
-+void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
-+  if (!VerifyOops) {
-+    return;
-+  }
++  shadd(t0, idx, z, t0, LogBytesPerInt);
++  ld(yz_idx2, Address(t0, 0));
++  ror_imm(yz_idx2, yz_idx2, 32, tmp);
 +
-+  const char* b = NULL;
-+  {
-+    ResourceMark rm;
-+    stringStream ss;
-+    ss.print("verify_oop_addr: %s", s);
-+    b = code_string(ss.as_string());
-+  }
-+  BLOCK_COMMENT("verify_oop_addr {");
++  add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp);
 +
-+  push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
++  ror_imm(tmp3, tmp3, 32, tmp);
++  sd(tmp3, Address(t0, 0));
 +
-+  if (addr.uses(sp)) {
-+    la(x10, addr);
-+    ld(x10, Address(x10, 4 * wordSize));
-+  } else {
-+    ld(x10, addr);
-+  }
++  bind(L_check_1);
 +
-+  li(t0, (uintptr_t)(address)b);
++  andi(idx, idx, 0x1);
++  subw(idx, idx, 1);
++  bltz(idx, L_post_third_loop_done);
++  shadd(t0, idx, y, t0, LogBytesPerInt);
++  lwu(tmp4, Address(t0, 0));
++  mul(tmp3, tmp4, product_hi); //  tmp4 * product_hi -> carry2:tmp3
++  mulhu(carry2, tmp4, product_hi);
 +
-+  // call indirectly to solve generation ordering problem
-+  int32_t offset = 0;
-+  la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset);
-+  ld(t1, Address(t1, offset));
-+  jalr(t1);
++  shadd(t0, idx, z, t0, LogBytesPerInt);
++  lwu(tmp4, Address(t0, 0));
 +
-+  pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
++  add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0);
 +
-+  BLOCK_COMMENT("} verify_oop_addr");
-+}
++  shadd(t0, idx, z, t0, LogBytesPerInt);
++  sw(tmp3, Address(t0, 0));
 +
-+Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
-+                                         int extra_slot_offset) {
-+  // cf. TemplateTable::prepare_invoke(), if (load_receiver).
-+  int stackElementSize = Interpreter::stackElementSize;
-+  int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
-+#ifdef ASSERT
-+  int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
-+  assert(offset1 - offset == stackElementSize, "correct arithmetic");
-+#endif
-+  if (arg_slot.is_constant()) {
-+    return Address(esp, arg_slot.as_constant() * stackElementSize + offset);
-+  } else {
-+    assert_different_registers(t0, arg_slot.as_register());
-+    shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize));
-+    return Address(t0, offset);
-+  }
-+}
++  slli(t0, carry2, 32);
++  srli(carry, tmp3, 32);
++  orr(carry, carry, t0);
 +
-+#ifndef PRODUCT
-+extern "C" void findpc(intptr_t x);
-+#endif
++  bind(L_post_third_loop_done);
++}
 +
-+void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[])
++/**
++ * Code for BigInteger::multiplyToLen() intrinsic.
++ *
++ * x10: x
++ * x11: xlen
++ * x12: y
++ * x13: ylen
++ * x14: z
++ * x15: zlen
++ * x16: tmp1
++ * x17: tmp2
++ * x7:  tmp3
++ * x28: tmp4
++ * x29: tmp5
++ * x30: tmp6
++ * x31: tmp7
++ */
++void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen,
++                                     Register z, Register zlen,
++                                     Register tmp1, Register tmp2, Register tmp3, Register tmp4,
++                                     Register tmp5, Register tmp6, Register product_hi)
 +{
-+  // In order to get locks to work, we need to fake a in_VM state
-+  if (ShowMessageBoxOnError) {
-+    JavaThread* thread = JavaThread::current();
-+    JavaThreadState saved_state = thread->thread_state();
-+    thread->set_thread_state(_thread_in_vm);
-+#ifndef PRODUCT
-+    if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
-+      ttyLocker ttyl;
-+      BytecodeCounter::print();
-+    }
-+#endif
-+    if (os::message_box(msg, "Execution stopped, print registers?")) {
-+      ttyLocker ttyl;
-+      tty->print_cr(" pc = 0x%016lx", pc);
-+#ifndef PRODUCT
-+      tty->cr();
-+      findpc(pc);
-+      tty->cr();
-+#endif
-+      tty->print_cr(" x0 = 0x%016lx", regs[0]);
-+      tty->print_cr(" x1 = 0x%016lx", regs[1]);
-+      tty->print_cr(" x2 = 0x%016lx", regs[2]);
-+      tty->print_cr(" x3 = 0x%016lx", regs[3]);
-+      tty->print_cr(" x4 = 0x%016lx", regs[4]);
-+      tty->print_cr(" x5 = 0x%016lx", regs[5]);
-+      tty->print_cr(" x6 = 0x%016lx", regs[6]);
-+      tty->print_cr(" x7 = 0x%016lx", regs[7]);
-+      tty->print_cr(" x8 = 0x%016lx", regs[8]);
-+      tty->print_cr(" x9 = 0x%016lx", regs[9]);
-+      tty->print_cr("x10 = 0x%016lx", regs[10]);
-+      tty->print_cr("x11 = 0x%016lx", regs[11]);
-+      tty->print_cr("x12 = 0x%016lx", regs[12]);
-+      tty->print_cr("x13 = 0x%016lx", regs[13]);
-+      tty->print_cr("x14 = 0x%016lx", regs[14]);
-+      tty->print_cr("x15 = 0x%016lx", regs[15]);
-+      tty->print_cr("x16 = 0x%016lx", regs[16]);
-+      tty->print_cr("x17 = 0x%016lx", regs[17]);
-+      tty->print_cr("x18 = 0x%016lx", regs[18]);
-+      tty->print_cr("x19 = 0x%016lx", regs[19]);
-+      tty->print_cr("x20 = 0x%016lx", regs[20]);
-+      tty->print_cr("x21 = 0x%016lx", regs[21]);
-+      tty->print_cr("x22 = 0x%016lx", regs[22]);
-+      tty->print_cr("x23 = 0x%016lx", regs[23]);
-+      tty->print_cr("x24 = 0x%016lx", regs[24]);
-+      tty->print_cr("x25 = 0x%016lx", regs[25]);
-+      tty->print_cr("x26 = 0x%016lx", regs[26]);
-+      tty->print_cr("x27 = 0x%016lx", regs[27]);
-+      tty->print_cr("x28 = 0x%016lx", regs[28]);
-+      tty->print_cr("x30 = 0x%016lx", regs[30]);
-+      tty->print_cr("x31 = 0x%016lx", regs[31]);
-+      BREAKPOINT;
-+    }
-+  }
-+  fatal("DEBUG MESSAGE: %s", msg);
-+}
++  assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
 +
-+void MacroAssembler::resolve_jobject(Register value, Register thread, Register tmp) {
-+  Label done, not_weak;
-+  beqz(value, done);           // Use NULL as-is.
++  const Register idx = tmp1;
++  const Register kdx = tmp2;
++  const Register xstart = tmp3;
 +
-+  // Test for jweak tag.
-+  andi(t0, value, JNIHandles::weak_tag_mask);
-+  beqz(t0, not_weak);
++  const Register y_idx = tmp4;
++  const Register carry = tmp5;
++  const Register product = xlen;
++  const Register x_xstart = zlen; // reuse register
 +
-+  // Resolve jweak.
-+  access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value,
-+                 Address(value, -JNIHandles::weak_tag_value), tmp, thread);
-+  verify_oop(value);
-+  j(done);
++  mv(idx, ylen); // idx = ylen;
++  mv(kdx, zlen); // kdx = xlen+ylen;
++  mv(carry, zr); // carry = 0;
 +
-+  bind(not_weak);
-+  // Resolve (untagged) jobject.
-+  access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread);
-+  verify_oop(value);
-+  bind(done);
-+}
++  Label L_multiply_64_x_64_loop, L_done;
 +
-+void MacroAssembler::stop(const char* msg) {
-+  address ip = pc();
-+  pusha();
-+  li(c_rarg0, (uintptr_t)(address)msg);
-+  li(c_rarg1, (uintptr_t)(address)ip);
-+  mv(c_rarg2, sp);
-+  mv(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64));
-+  jalr(c_rarg3);
-+  ebreak();
-+}
++  subw(xstart, xlen, 1);
++  bltz(xstart, L_done);
 +
-+void MacroAssembler::unimplemented(const char* what) {
-+  const char* buf = NULL;
-+  {
-+    ResourceMark rm;
-+    stringStream ss;
-+    ss.print("unimplemented: %s", what);
-+    buf = code_string(ss.as_string());
-+  }
-+  stop(buf);
-+}
++  const Register jdx = tmp1;
 +
-+void MacroAssembler::emit_static_call_stub() {
-+  // CompiledDirectStaticCall::set_to_interpreted knows the
-+  // exact layout of this stub.
++  if (AvoidUnalignedAccesses) {
++    // Check if x and y are both 8-byte aligned.
++    orr(t0, xlen, ylen);
++    andi(t0, t0, 0x1);
++    beqz(t0, L_multiply_64_x_64_loop);
 +
-+  ifence();
-+  mov_metadata(xmethod, (Metadata*)NULL);
++    multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
++    shadd(t0, xstart, z, t0, LogBytesPerInt);
++    sw(carry, Address(t0, 0));
 +
-+  // Jump to the entry point of the i2c stub.
-+  int32_t offset = 0;
-+  movptr_with_offset(t0, 0, offset);
-+  jalr(x0, t0, offset);
-+}
++    Label L_second_loop_unaligned;
++    bind(L_second_loop_unaligned);
++    mv(carry, zr);
++    mv(jdx, ylen);
++    subw(xstart, xstart, 1);
++    bltz(xstart, L_done);
++    sub(sp, sp, 2 * wordSize);
++    sd(z, Address(sp, 0));
++    sd(zr, Address(sp, wordSize));
++    shadd(t0, xstart, z, t0, LogBytesPerInt);
++    addi(z, t0, 4);
++    shadd(t0, xstart, x, t0, LogBytesPerInt);
++    lwu(product, Address(t0, 0));
++    Label L_third_loop, L_third_loop_exit;
 +
-+void MacroAssembler::call_VM_leaf_base(address entry_point,
-+                                       int number_of_arguments,
-+                                       Label *retaddr) {
-+  call_native_base(entry_point, retaddr);
-+}
++    blez(jdx, L_third_loop_exit);
 +
-+void MacroAssembler::call_native(address entry_point, Register arg_0) {
-+  pass_arg0(this, arg_0);
-+  call_native_base(entry_point);
-+}
++    bind(L_third_loop);
++    subw(jdx, jdx, 1);
++    shadd(t0, jdx, y, t0, LogBytesPerInt);
++    lwu(t0, Address(t0, 0));
++    mul(t1, t0, product);
++    add(t0, t1, carry);
++    shadd(tmp6, jdx, z, t1, LogBytesPerInt);
++    lwu(t1, Address(tmp6, 0));
++    add(t0, t0, t1);
++    sw(t0, Address(tmp6, 0));
++    srli(carry, t0, 32);
++    bgtz(jdx, L_third_loop);
 +
-+void MacroAssembler::call_native_base(address entry_point, Label *retaddr) {
-+  Label E, L;
-+  int32_t offset = 0;
-+  push_reg(0x80000040, sp);   // push << t0 & xmethod >> to sp
-+  movptr_with_offset(t0, entry_point, offset);
-+  jalr(x1, t0, offset);
-+  if (retaddr != NULL) {
-+    bind(*retaddr);
++    bind(L_third_loop_exit);
++    ld(z, Address(sp, 0));
++    addi(sp, sp, 2 * wordSize);
++    shadd(t0, xstart, z, t0, LogBytesPerInt);
++    sw(carry, Address(t0, 0));
++
++    j(L_second_loop_unaligned);
 +  }
-+  pop_reg(0x80000040, sp);   // pop << t0 & xmethod >> from sp
-+}
 +
-+void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
-+  call_VM_leaf_base(entry_point, number_of_arguments);
-+}
++  bind(L_multiply_64_x_64_loop);
++  multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
 +
-+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
-+  pass_arg0(this, arg_0);
-+  call_VM_leaf_base(entry_point, 1);
-+}
++  Label L_second_loop_aligned;
++  beqz(kdx, L_second_loop_aligned);
 +
-+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
-+  pass_arg0(this, arg_0);
-+  pass_arg1(this, arg_1);
-+  call_VM_leaf_base(entry_point, 2);
-+}
++  Label L_carry;
++  subw(kdx, kdx, 1);
++  beqz(kdx, L_carry);
 +
-+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0,
-+                                  Register arg_1, Register arg_2) {
-+  pass_arg0(this, arg_0);
-+  pass_arg1(this, arg_1);
-+  pass_arg2(this, arg_2);
-+  call_VM_leaf_base(entry_point, 3);
-+}
++  shadd(t0, kdx, z, t0, LogBytesPerInt);
++  sw(carry, Address(t0, 0));
++  srli(carry, carry, 32);
++  subw(kdx, kdx, 1);
 +
-+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
-+  pass_arg0(this, arg_0);
-+  MacroAssembler::call_VM_leaf_base(entry_point, 1);
-+}
++  bind(L_carry);
++  shadd(t0, kdx, z, t0, LogBytesPerInt);
++  sw(carry, Address(t0, 0));
 +
-+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
++  // Second and third (nested) loops.
++  //
++  // for (int i = xstart-1; i >= 0; i--) { // Second loop
++  //   carry = 0;
++  //   for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
++  //     long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
++  //                    (z[k] & LONG_MASK) + carry;
++  //     z[k] = (int)product;
++  //     carry = product >>> 32;
++  //   }
++  //   z[i] = (int)carry;
++  // }
++  //
++  // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi
 +
-+  assert(arg_0 != c_rarg1, "smashed arg");
-+  pass_arg1(this, arg_1);
-+  pass_arg0(this, arg_0);
-+  MacroAssembler::call_VM_leaf_base(entry_point, 2);
-+}
++  bind(L_second_loop_aligned);
++  mv(carry, zr); // carry = 0;
++  mv(jdx, ylen); // j = ystart+1
 +
-+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
-+  assert(arg_0 != c_rarg2, "smashed arg");
-+  assert(arg_1 != c_rarg2, "smashed arg");
-+  pass_arg2(this, arg_2);
-+  assert(arg_0 != c_rarg1, "smashed arg");
-+  pass_arg1(this, arg_1);
-+  pass_arg0(this, arg_0);
-+  MacroAssembler::call_VM_leaf_base(entry_point, 3);
-+}
++  subw(xstart, xstart, 1); // i = xstart-1;
++  bltz(xstart, L_done);
 +
-+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
-+  assert(arg_0 != c_rarg3, "smashed arg");
-+  assert(arg_1 != c_rarg3, "smashed arg");
-+  assert(arg_2 != c_rarg3, "smashed arg");
-+  pass_arg3(this, arg_3);
-+  assert(arg_0 != c_rarg2, "smashed arg");
-+  assert(arg_1 != c_rarg2, "smashed arg");
-+  pass_arg2(this, arg_2);
-+  assert(arg_0 != c_rarg1, "smashed arg");
-+  pass_arg1(this, arg_1);
-+  pass_arg0(this, arg_0);
-+  MacroAssembler::call_VM_leaf_base(entry_point, 4);
-+}
++  sub(sp, sp, 4 * wordSize);
++  sd(z, Address(sp, 0));
 +
-+void MacroAssembler::nop() {
-+  addi(x0, x0, 0);
-+}
++  Label L_last_x;
++  shadd(t0, xstart, z, t0, LogBytesPerInt);
++  addi(z, t0, 4);
++  subw(xstart, xstart, 1); // i = xstart-1;
++  bltz(xstart, L_last_x);
 +
-+void MacroAssembler::mv(Register Rd, Register Rs) {
-+  if (Rd != Rs) {
-+    addi(Rd, Rs, 0);
-+  }
-+}
-+
-+void MacroAssembler::notr(Register Rd, Register Rs) {
-+  xori(Rd, Rs, -1);
-+}
-+
-+void MacroAssembler::neg(Register Rd, Register Rs) {
-+  sub(Rd, x0, Rs);
-+}
++  shadd(t0, xstart, x, t0, LogBytesPerInt);
++  ld(product_hi, Address(t0, 0));
++  ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian
 +
-+void MacroAssembler::negw(Register Rd, Register Rs) {
-+  subw(Rd, x0, Rs);
-+}
++  Label L_third_loop_prologue;
++  bind(L_third_loop_prologue);
 +
-+void MacroAssembler::sext_w(Register Rd, Register Rs) {
-+  addiw(Rd, Rs, 0);
-+}
++  sd(ylen, Address(sp, wordSize));
++  sd(x, Address(sp, 2 * wordSize));
++  sd(xstart, Address(sp, 3 * wordSize));
++  multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product,
++                          tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi);
++  ld(z, Address(sp, 0));
++  ld(ylen, Address(sp, wordSize));
++  ld(x, Address(sp, 2 * wordSize));
++  ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen
++  addi(sp, sp, 4 * wordSize);
 +
-+void MacroAssembler::zext_b(Register Rd, Register Rs) {
-+  andi(Rd, Rs, 0xFF);
-+}
++  addiw(tmp3, xlen, 1);
++  shadd(t0, tmp3, z, t0, LogBytesPerInt);
++  sw(carry, Address(t0, 0));
 +
-+void MacroAssembler::seqz(Register Rd, Register Rs) {
-+  sltiu(Rd, Rs, 1);
-+}
++  subw(tmp3, tmp3, 1);
++  bltz(tmp3, L_done);
 +
-+void MacroAssembler::snez(Register Rd, Register Rs) {
-+  sltu(Rd, x0, Rs);
-+}
++  srli(carry, carry, 32);
++  shadd(t0, tmp3, z, t0, LogBytesPerInt);
++  sw(carry, Address(t0, 0));
++  j(L_second_loop_aligned);
 +
-+void MacroAssembler::sltz(Register Rd, Register Rs) {
-+  slt(Rd, Rs, x0);
-+}
++  // Next infrequent code is moved outside loops.
++  bind(L_last_x);
++  lwu(product_hi, Address(x, 0));
++  j(L_third_loop_prologue);
 +
-+void MacroAssembler::sgtz(Register Rd, Register Rs) {
-+  slt(Rd, x0, Rs);
++  bind(L_done);
 +}
++#endif
 +
-+void MacroAssembler::fmv_s(FloatRegister Rd, FloatRegister Rs) {
-+  if (Rd != Rs) {
-+    fsgnj_s(Rd, Rs, Rs);
++// Count bits of trailing zero chars from lsb to msb until first non-zero element.
++// For LL case, one byte for one element, so shift 8 bits once, and for other case,
++// shift 16 bits once.
++void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2)
++{
++  if (UseZbb) {
++    assert_different_registers(Rd, Rs, tmp1);
++    int step = isLL ? 8 : 16;
++    ctz(Rd, Rs);
++    andi(tmp1, Rd, step - 1);
++    sub(Rd, Rd, tmp1);
++    return;
 +  }
-+}
-+
-+void MacroAssembler::fabs_s(FloatRegister Rd, FloatRegister Rs) {
-+  fsgnjx_s(Rd, Rs, Rs);
-+}
++  assert_different_registers(Rd, Rs, tmp1, tmp2);
++  Label Loop;
++  int step = isLL ? 8 : 16;
++  mv(Rd, -step);
++  mv(tmp2, Rs);
 +
-+void MacroAssembler::fneg_s(FloatRegister Rd, FloatRegister Rs) {
-+  fsgnjn_s(Rd, Rs, Rs);
++  bind(Loop);
++  addi(Rd, Rd, step);
++  andi(tmp1, tmp2, ((1 << step) - 1));
++  srli(tmp2, tmp2, step);
++  beqz(tmp1, Loop);
 +}
 +
-+void MacroAssembler::fmv_d(FloatRegister Rd, FloatRegister Rs) {
-+  if (Rd != Rs) {
-+    fsgnj_d(Rd, Rs, Rs);
++// This instruction reads adjacent 4 bytes from the lower half of source register,
++// inflate into a register, for example:
++// Rs: A7A6A5A4A3A2A1A0
++// Rd: 00A300A200A100A0
++void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2)
++{
++  assert_different_registers(Rd, Rs, tmp1, tmp2);
++  mv(tmp1, 0xFF);
++  mv(Rd, zr);
++  for (int i = 0; i <= 3; i++)
++  {
++    andr(tmp2, Rs, tmp1);
++    if (i) {
++      slli(tmp2, tmp2, i * 8);
++    }
++    orr(Rd, Rd, tmp2);
++    if (i != 3) {
++      slli(tmp1, tmp1, 8);
++    }
 +  }
 +}
 +
-+void MacroAssembler::fabs_d(FloatRegister Rd, FloatRegister Rs) {
-+  fsgnjx_d(Rd, Rs, Rs);
-+}
-+
-+void MacroAssembler::fneg_d(FloatRegister Rd, FloatRegister Rs) {
-+  fsgnjn_d(Rd, Rs, Rs);
-+}
-+
-+void MacroAssembler::vmnot_m(VectorRegister vd, VectorRegister vs) {
-+  vmnand_mm(vd, vs, vs);
-+}
-+
-+void MacroAssembler::vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm) {
-+  vnsrl_wx(vd, vs, x0, vm);
++// This instruction reads adjacent 4 bytes from the upper half of source register,
++// inflate into a register, for example:
++// Rs: A7A6A5A4A3A2A1A0
++// Rd: 00A700A600A500A4
++void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2)
++{
++  assert_different_registers(Rd, Rs, tmp1, tmp2);
++  mv(tmp1, 0xFF00000000);
++  mv(Rd, zr);
++  for (int i = 0; i <= 3; i++)
++  {
++    andr(tmp2, Rs, tmp1);
++    orr(Rd, Rd, tmp2);
++    srli(Rd, Rd, 8);
++    if (i != 3) {
++      slli(tmp1, tmp1, 8);
++    }
++  }
 +}
 +
-+void MacroAssembler::vfneg_v(VectorRegister vd, VectorRegister vs) {
-+  vfsgnjn_vv(vd, vs, vs);
-+}
++// The size of the blocks erased by the zero_blocks stub.  We must
++// handle anything smaller than this ourselves in zero_words().
++const int MacroAssembler::zero_words_block_size = 8;
 +
-+void MacroAssembler::la(Register Rd, const address &dest) {
-+  int64_t offset = dest - pc();
-+  if (is_offset_in_range(offset, 32)) {
-+    auipc(Rd, (int32_t)offset + 0x800);  //0x800, Note:the 11th sign bit
-+    addi(Rd, Rd, ((int64_t)offset << 52) >> 52);
-+  } else {
-+    movptr(Rd, dest);
-+  }
-+}
++// zero_words() is used by C2 ClearArray patterns.  It is as small as
++// possible, handling small word counts locally and delegating
++// anything larger to the zero_blocks stub.  It is expanded many times
++// in compiled code, so it is important to keep it short.
 +
-+void MacroAssembler::la(Register Rd, const Address &adr) {
-+  InstructionMark im(this);
-+  code_section()->relocate(inst_mark(), adr.rspec());
-+  relocInfo::relocType rtype = adr.rspec().reloc()->type();
++// ptr:   Address of a buffer to be zeroed.
++// cnt:   Count in HeapWords.
++//
++// ptr, cnt, and t0 are clobbered.
++address MacroAssembler::zero_words(Register ptr, Register cnt)
++{
++  assert(is_power_of_2(zero_words_block_size), "adjust this");
++  assert(ptr == x28 && cnt == x29, "mismatch in register usage");
++  assert_different_registers(cnt, t0);
 +
-+  switch (adr.getMode()) {
-+    case Address::literal: {
-+      if (rtype == relocInfo::none) {
-+        li(Rd, (intptr_t)(adr.target()));
-+      } else {
-+        movptr(Rd, adr.target());
++  BLOCK_COMMENT("zero_words {");
++  mv(t0, zero_words_block_size);
++  Label around, done, done16;
++  bltu(cnt, t0, around);
++  {
++    RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks());
++    assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated");
++    if (StubRoutines::riscv::complete()) {
++      address tpc = trampoline_call(zero_blocks);
++      if (tpc == NULL) {
++        DEBUG_ONLY(reset_labels1(around));
++        postcond(pc() == badAddress);
++        return NULL;
 +      }
-+      break;
++    } else {
++      jal(zero_blocks);
 +    }
-+    case Address::base_plus_offset: {
-+      int32_t offset = 0;
-+      baseOffset(Rd, adr, offset);
-+      addi(Rd, Rd, offset);
-+      break;
++  }
++  bind(around);
++  for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) {
++    Label l;
++    andi(t0, cnt, i);
++    beqz(t0, l);
++    for (int j = 0; j < i; j++) {
++      sd(zr, Address(ptr, 0));
++      addi(ptr, ptr, 8);
 +    }
-+    default:
-+      ShouldNotReachHere();
++    bind(l);
 +  }
++  {
++    Label l;
++    andi(t0, cnt, 1);
++    beqz(t0, l);
++    sd(zr, Address(ptr, 0));
++    bind(l);
++  }
++  BLOCK_COMMENT("} zero_words");
++  postcond(pc() != badAddress);
++  return pc();
 +}
 +
-+void MacroAssembler::la(Register Rd, Label &label) {
-+  la(Rd, target(label));
-+}
-+
-+#define INSN(NAME)                                                                \
-+  void MacroAssembler::NAME##z(Register Rs, const address &dest) {                \
-+    NAME(Rs, zr, dest);                                                           \
-+  }                                                                               \
-+  void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) {              \
-+    NAME(Rs, zr, l, is_far);                                                      \
-+  }                                                                               \
++#define SmallArraySize (18 * BytesPerLong)
 +
-+  INSN(beq);
-+  INSN(bne);
-+  INSN(blt);
-+  INSN(ble);
-+  INSN(bge);
-+  INSN(bgt);
++// base:  Address of a buffer to be zeroed, 8 bytes aligned.
++// cnt:   Immediate count in HeapWords.
++void MacroAssembler::zero_words(Register base, u_int64_t cnt)
++{
++  assert_different_registers(base, t0, t1);
 +
-+#undef INSN
++  BLOCK_COMMENT("zero_words {");
 +
-+// Float compare branch instructions
++  if (cnt <= SmallArraySize / BytesPerLong) {
++    for (int i = 0; i < (int)cnt; i++) {
++      sd(zr, Address(base, i * wordSize));
++    }
++  } else {
++    const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll
++    int remainder = cnt % unroll;
++    for (int i = 0; i < remainder; i++) {
++      sd(zr, Address(base, i * wordSize));
++    }
 +
-+#define INSN(NAME, FLOATCMP, BRANCH)                                                                                   \
-+  void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) {  \
-+    FLOATCMP##_s(t0, Rs1, Rs2);                                                                                        \
-+    BRANCH(t0, l, is_far);                                                                                             \
-+  }                                                                                                                    \
-+  void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \
-+    FLOATCMP##_d(t0, Rs1, Rs2);                                                                                        \
-+    BRANCH(t0, l, is_far);                                                                                             \
++    Label loop;
++    Register cnt_reg = t0;
++    Register loop_base = t1;
++    cnt = cnt - remainder;
++    mv(cnt_reg, cnt);
++    add(loop_base, base, remainder * wordSize);
++    bind(loop);
++    sub(cnt_reg, cnt_reg, unroll);
++    for (int i = 0; i < unroll; i++) {
++      sd(zr, Address(loop_base, i * wordSize));
++    }
++    add(loop_base, loop_base, unroll * wordSize);
++    bnez(cnt_reg, loop);
 +  }
 +
-+  INSN(beq, feq, bnez);
-+  INSN(bne, feq, beqz);
++  BLOCK_COMMENT("} zero_words");
++}
 +
-+#undef INSN
++// base:   Address of a buffer to be filled, 8 bytes aligned.
++// cnt:    Count in 8-byte unit.
++// value:  Value to be filled with.
++// base will point to the end of the buffer after filling.
++void MacroAssembler::fill_words(Register base, Register cnt, Register value)
++{
++//  Algorithm:
++//
++//    t0 = cnt & 7
++//    cnt -= t0
++//    p += t0
++//    switch (t0):
++//      switch start:
++//      do while cnt
++//        cnt -= 8
++//          p[-8] = value
++//        case 7:
++//          p[-7] = value
++//        case 6:
++//          p[-6] = value
++//          // ...
++//        case 1:
++//          p[-1] = value
++//        case 0:
++//          p += 8
++//      do-while end
++//    switch end
 +
++  assert_different_registers(base, cnt, value, t0, t1);
 +
-+#define INSN(NAME, FLOATCMP1, FLOATCMP2)                                              \
-+  void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,   \
-+                                    bool is_far, bool is_unordered) {                 \
-+    if (is_unordered) {                                                               \
-+      /* jump if either source is NaN or condition is expected */                     \
-+      FLOATCMP2##_s(t0, Rs2, Rs1);                                                    \
-+      beqz(t0, l, is_far);                                                            \
-+    } else {                                                                          \
-+      /* jump if no NaN in source and condition is expected */                        \
-+      FLOATCMP1##_s(t0, Rs1, Rs2);                                                    \
-+      bnez(t0, l, is_far);                                                            \
-+    }                                                                                 \
-+  }                                                                                   \
-+  void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,  \
-+                                     bool is_far, bool is_unordered) {                \
-+    if (is_unordered) {                                                               \
-+      /* jump if either source is NaN or condition is expected */                     \
-+      FLOATCMP2##_d(t0, Rs2, Rs1);                                                    \
-+      beqz(t0, l, is_far);                                                            \
-+    } else {                                                                          \
-+      /* jump if no NaN in source and condition is expected */                        \
-+      FLOATCMP1##_d(t0, Rs1, Rs2);                                                    \
-+      bnez(t0, l, is_far);                                                            \
-+    }                                                                                 \
-+  }
++  Label fini, skip, entry, loop;
++  const int unroll = 8; // Number of sd instructions we'll unroll
 +
-+  INSN(ble, fle, flt);
-+  INSN(blt, flt, fle);
++  beqz(cnt, fini);
 +
-+#undef INSN
++  andi(t0, cnt, unroll - 1);
++  sub(cnt, cnt, t0);
++  // align 8, so first sd n % 8 = mod, next loop sd 8 * n.
++  shadd(base, t0, base, t1, 3);
++  la(t1, entry);
++  slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst)
++  sub(t1, t1, t0);
++  jr(t1);
 +
-+#define INSN(NAME, CMP)                                                              \
-+  void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,  \
-+                                    bool is_far, bool is_unordered) {                \
-+    float_##CMP(Rs2, Rs1, l, is_far, is_unordered);                                  \
-+  }                                                                                  \
-+  void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \
-+                                     bool is_far, bool is_unordered) {               \
-+    double_##CMP(Rs2, Rs1, l, is_far, is_unordered);                                 \
++  bind(loop);
++  add(base, base, unroll * 8);
++  for (int i = -unroll; i < 0; i++) {
++    sd(value, Address(base, i * 8));
 +  }
++  bind(entry);
++  sub(cnt, cnt, unroll);
++  bgez(cnt, loop);
 +
-+  INSN(bgt, blt);
-+  INSN(bge, ble);
-+
-+#undef INSN
-+
++  bind(fini);
++}
 +
-+#define INSN(NAME, CSR)                       \
-+  void MacroAssembler::NAME(Register Rd) {    \
-+    csrr(Rd, CSR);                            \
-+  }
++#define FCVT_SAFE(FLOATCVT, FLOATEQ)                                                             \
++void MacroAssembler:: FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) {           \
++  Label L_Okay;                                                                                  \
++  fscsr(zr);                                                                                     \
++  FLOATCVT(dst, src);                                                                            \
++  frcsr(tmp);                                                                                    \
++  andi(tmp, tmp, 0x1E);                                                                          \
++  beqz(tmp, L_Okay);                                                                             \
++  FLOATEQ(tmp, src, src);                                                                        \
++  bnez(tmp, L_Okay);                                                                             \
++  mv(dst, zr);                                                                                   \
++  bind(L_Okay);                                                                                  \
++}
 +
-+  INSN(rdinstret,  CSR_INSTERT);
-+  INSN(rdcycle,    CSR_CYCLE);
-+  INSN(rdtime,     CSR_TIME);
-+  INSN(frcsr,      CSR_FCSR);
-+  INSN(frrm,       CSR_FRM);
-+  INSN(frflags,    CSR_FFLAGS);
++FCVT_SAFE(fcvt_w_s, feq_s)
++FCVT_SAFE(fcvt_l_s, feq_s)
++FCVT_SAFE(fcvt_w_d, feq_d)
++FCVT_SAFE(fcvt_l_d, feq_d)
 +
-+#undef INSN
++#undef FCVT_SAFE
 +
-+void MacroAssembler::csrr(Register Rd, unsigned csr) {
-+  csrrs(Rd, csr, x0);
++#define FCMP(FLOATTYPE, FLOATSIG)                                                       \
++void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1,            \
++                                         FloatRegister Rs2, int unordered_result) {     \
++  Label Ldone;                                                                          \
++  if (unordered_result < 0) {                                                           \
++    /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */    \
++    /* installs 1 if gt else 0 */                                                       \
++    flt_##FLOATSIG(result, Rs2, Rs1);                                                   \
++    /* Rs1 > Rs2, install 1 */                                                          \
++    bgtz(result, Ldone);                                                                \
++    feq_##FLOATSIG(result, Rs1, Rs2);                                                   \
++    addi(result, result, -1);                                                           \
++    /* Rs1 = Rs2, install 0 */                                                          \
++    /* NaN or Rs1 < Rs2, install -1 */                                                  \
++    bind(Ldone);                                                                        \
++  } else {                                                                              \
++    /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */    \
++    /* installs 1 if gt or unordered else 0 */                                          \
++    flt_##FLOATSIG(result, Rs1, Rs2);                                                   \
++    /* Rs1 < Rs2, install -1 */                                                         \
++    bgtz(result, Ldone);                                                                \
++    feq_##FLOATSIG(result, Rs1, Rs2);                                                   \
++    addi(result, result, -1);                                                           \
++    /* Rs1 = Rs2, install 0 */                                                          \
++    /* NaN or Rs1 > Rs2, install 1 */                                                   \
++    bind(Ldone);                                                                        \
++    neg(result, result);                                                                \
++  }                                                                                     \
 +}
 +
-+#define INSN(NAME, OPFUN)                                      \
-+  void MacroAssembler::NAME(unsigned csr, Register Rs) {       \
-+    OPFUN(x0, csr, Rs);                                        \
-+  }
++FCMP(float, s);
++FCMP(double, d);
 +
-+  INSN(csrw, csrrw);
-+  INSN(csrs, csrrs);
-+  INSN(csrc, csrrc);
++#undef FCMP
 +
-+#undef INSN
++// Zero words; len is in bytes
++// Destroys all registers except addr
++// len must be a nonzero multiple of wordSize
++void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) {
++  assert_different_registers(addr, len, tmp, t0, t1);
 +
-+#define INSN(NAME, OPFUN)                                      \
-+  void MacroAssembler::NAME(unsigned csr, unsigned imm) {      \
-+    OPFUN(x0, csr, imm);                                       \
++#ifdef ASSERT
++  {
++    Label L;
++    andi(t0, len, BytesPerWord - 1);
++    beqz(t0, L);
++    stop("len is not a multiple of BytesPerWord");
++    bind(L);
 +  }
++#endif // ASSERT
 +
-+  INSN(csrwi, csrrwi);
-+  INSN(csrsi, csrrsi);
-+  INSN(csrci, csrrci);
-+
-+#undef INSN
++#ifndef PRODUCT
++  block_comment("zero memory");
++#endif // PRODUCT
 +
-+#define INSN(NAME, CSR)                                      \
-+  void MacroAssembler::NAME(Register Rd, Register Rs) {      \
-+    csrrw(Rd, CSR, Rs);                                      \
-+  }
++  Label loop;
++  Label entry;
 +
-+  INSN(fscsr,   CSR_FCSR);
-+  INSN(fsrm,    CSR_FRM);
-+  INSN(fsflags, CSR_FFLAGS);
++  // Algorithm:
++  //
++  //  t0 = cnt & 7
++  //  cnt -= t0
++  //  p += t0
++  //  switch (t0) {
++  //    do {
++  //      cnt -= 8
++  //        p[-8] = 0
++  //      case 7:
++  //        p[-7] = 0
++  //      case 6:
++  //        p[-6] = 0
++  //        ...
++  //      case 1:
++  //        p[-1] = 0
++  //      case 0:
++  //        p += 8
++  //     } while (cnt)
++  //  }
 +
-+#undef INSN
++  const int unroll = 8;   // Number of sd(zr) instructions we'll unroll
 +
-+#define INSN(NAME)                              \
-+  void MacroAssembler::NAME(Register Rs) {      \
-+    NAME(x0, Rs);                               \
++  srli(len, len, LogBytesPerWord);
++  andi(t0, len, unroll - 1);  // t0 = cnt % unroll
++  sub(len, len, t0);          // cnt -= unroll
++  // tmp always points to the end of the region we're about to zero
++  shadd(tmp, t0, addr, t1, LogBytesPerWord);
++  la(t1, entry);
++  slli(t0, t0, 2);
++  sub(t1, t1, t0);
++  jr(t1);
++  bind(loop);
++  sub(len, len, unroll);
++  for (int i = -unroll; i < 0; i++) {
++    Assembler::sd(zr, Address(tmp, i * wordSize));
 +  }
-+
-+  INSN(fscsr);
-+  INSN(fsrm);
-+  INSN(fsflags);
-+
-+#undef INSN
-+
-+void MacroAssembler::fsrmi(Register Rd, unsigned imm) {
-+  guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register");
-+  csrrwi(Rd, CSR_FRM, imm);
-+}
-+
-+void MacroAssembler::fsflagsi(Register Rd, unsigned imm) {
-+   csrrwi(Rd, CSR_FFLAGS, imm);
-+}
-+
-+#define INSN(NAME)                             \
-+  void MacroAssembler::NAME(unsigned imm) {    \
-+    NAME(x0, imm);                             \
-+  }
-+
-+  INSN(fsrmi);
-+  INSN(fsflagsi);
-+
-+#undef INSN
-+
-+void MacroAssembler::push_reg(Register Rs)
-+{
-+  addi(esp, esp, 0 - wordSize);
-+  sd(Rs, Address(esp, 0));
-+}
-+
-+void MacroAssembler::pop_reg(Register Rd)
-+{
-+  ld(Rd, esp, 0);
-+  addi(esp, esp, wordSize);
++  bind(entry);
++  add(tmp, tmp, unroll * wordSize);
++  bnez(len, loop);
 +}
 +
-+int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) {
-+  int count = 0;
-+  // Scan bitset to accumulate register pairs
-+  for (int reg = 31; reg >= 0; reg--) {
-+    if ((1U << 31) & bitset) {
-+      regs[count++] = reg;
++// shift left by shamt and add
++// Rd = (Rs1 << shamt) + Rs2
++void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) {
++  if (UseZba) {
++    if (shamt == 1) {
++      sh1add(Rd, Rs1, Rs2);
++      return;
++    } else if (shamt == 2) {
++      sh2add(Rd, Rs1, Rs2);
++      return;
++    } else if (shamt == 3) {
++      sh3add(Rd, Rs1, Rs2);
++      return;
 +    }
-+    bitset <<= 1;
 +  }
-+  return count;
-+}
-+
-+// Push lots of registers in the bit set supplied.  Don't push sp.
-+// Return the number of words pushed
-+int MacroAssembler::push_reg(unsigned int bitset, Register stack) {
-+  DEBUG_ONLY(int words_pushed = 0;)
-+  CompressibleRegion cr(this);
-+
-+  unsigned char regs[32];
-+  int count = bitset_to_regs(bitset, regs);
-+  // reserve one slot to align for odd count
-+  int offset = is_even(count) ? 0 : wordSize;
 +
-+  if (count) {
-+    addi(stack, stack, - count * wordSize - offset);
-+  }
-+  for (int i = count - 1; i >= 0; i--) {
-+    sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
-+    DEBUG_ONLY(words_pushed ++;)
++  if (shamt != 0) {
++    slli(tmp, Rs1, shamt);
++    add(Rd, Rs2, tmp);
++  } else {
++    add(Rd, Rs1, Rs2);
 +  }
-+
-+  assert(words_pushed == count, "oops, pushed != count");
-+
-+  return count;
 +}
 +
-+int MacroAssembler::pop_reg(unsigned int bitset, Register stack) {
-+  DEBUG_ONLY(int words_popped = 0;)
-+  CompressibleRegion cr(this);
-+
-+  unsigned char regs[32];
-+  int count = bitset_to_regs(bitset, regs);
-+  // reserve one slot to align for odd count
-+  int offset = is_even(count) ? 0 : wordSize;
-+
-+  for (int i = count - 1; i >= 0; i--) {
-+    ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
-+    DEBUG_ONLY(words_popped ++;)
++void MacroAssembler::zero_extend(Register dst, Register src, int bits) {
++  if (UseZba && bits == 32) {
++    zext_w(dst, src);
++    return;
 +  }
 +
-+  if (count) {
-+    addi(stack, stack, count * wordSize + offset);
++  if (UseZbb && bits == 16) {
++    zext_h(dst, src);
++    return;
 +  }
-+  assert(words_popped == count, "oops, popped != count");
 +
-+  return count;
++  if (bits == 8) {
++    zext_b(dst, src);
++  } else {
++    slli(dst, src, XLEN - bits);
++    srli(dst, dst, XLEN - bits);
++  }
 +}
 +
-+// Push float registers in the bitset, except sp.
-+// Return the number of heapwords pushed.
-+int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
-+  CompressibleRegion cr(this);
-+  int words_pushed = 0;
-+  unsigned char regs[32];
-+  int count = bitset_to_regs(bitset, regs);
-+  int push_slots = count + (count & 1);
-+
-+  if (count) {
-+    addi(stack, stack, -push_slots * wordSize);
++void MacroAssembler::sign_extend(Register dst, Register src, int bits) {
++  if (UseZbb) {
++    if (bits == 8) {
++      sext_b(dst, src);
++      return;
++    } else if (bits == 16) {
++      sext_h(dst, src);
++      return;
++    }
 +  }
 +
-+  for (int i = count - 1; i >= 0; i--) {
-+    fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize));
-+    words_pushed++;
++  if (bits == 32) {
++    sext_w(dst, src);
++  } else {
++    slli(dst, src, XLEN - bits);
++    srai(dst, dst, XLEN - bits);
 +  }
-+
-+  assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count);
-+  return count;
 +}
 +
-+int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
-+  CompressibleRegion cr(this);
-+  int words_popped = 0;
-+  unsigned char regs[32];
-+  int count = bitset_to_regs(bitset, regs);
-+  int pop_slots = count + (count & 1);
-+
-+  for (int i = count - 1; i >= 0; i--) {
-+    fld(as_FloatRegister(regs[i]), Address(stack, (pop_slots - 1 - i) * wordSize));
-+    words_popped++;
++void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp)
++{
++  if (src1 == src2) {
++    mv(dst, zr);
++    return;
 +  }
-+
-+  if (count) {
-+    addi(stack, stack, pop_slots * wordSize);
++  Label done;
++  Register left = src1;
++  Register right = src2;
++  if (dst == src1) {
++    assert_different_registers(dst, src2, tmp);
++    mv(tmp, src1);
++    left = tmp;
++  } else if (dst == src2) {
++    assert_different_registers(dst, src1, tmp);
++    mv(tmp, src2);
++    right = tmp;
 +  }
 +
-+  assert(words_popped == count, "oops, popped(%d) != count(%d)", words_popped, count);
-+  return count;
++  // installs 1 if gt else 0
++  slt(dst, right, left);
++  bnez(dst, done);
++  slt(dst, left, right);
++  // dst = -1 if lt; else if eq , dst = 0
++  neg(dst, dst);
++  bind(done);
 +}
 +
 +#ifdef COMPILER2
-+int MacroAssembler::push_vp(unsigned int bitset, Register stack) {
-+  CompressibleRegion cr(this);
-+  int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
++// short string
++// StringUTF16.indexOfChar
++// StringLatin1.indexOfChar
++void MacroAssembler::string_indexof_char_short(Register str1, Register cnt1,
++                                                  Register ch, Register result,
++                                                  bool isL)
++{
++  Register ch1 = t0;
++  Register index = t1;
 +
-+  // Scan bitset to accumulate register pairs
-+  unsigned char regs[32];
-+  int count = 0;
-+  for (int reg = 31; reg >= 0; reg--) {
-+    if ((1U << 31) & bitset) {
-+      regs[count++] = reg;
-+    }
-+    bitset <<= 1;
-+  }
++  BLOCK_COMMENT("string_indexof_char_short {");
 +
-+  for (int i = 0; i < count; i++) {
-+    sub(stack, stack, vector_size_in_bytes);
-+    vs1r_v(as_VectorRegister(regs[i]), stack);
-+  }
++  Label LOOP, LOOP1, LOOP4, LOOP8;
++  Label MATCH,  MATCH1, MATCH2, MATCH3,
++          MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH;
 +
-+  return count * vector_size_in_bytes / wordSize;
-+}
++  mv(result, -1);
++  mv(index, zr);
 +
-+int MacroAssembler::pop_vp(unsigned int bitset, Register stack) {
-+  CompressibleRegion cr(this);
-+  int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
++  bind(LOOP);
++  addi(t0, index, 8);
++  ble(t0, cnt1, LOOP8);
++  addi(t0, index, 4);
++  ble(t0, cnt1, LOOP4);
++  j(LOOP1);
 +
-+  // Scan bitset to accumulate register pairs
-+  unsigned char regs[32];
-+  int count = 0;
-+  for (int reg = 31; reg >= 0; reg--) {
-+    if ((1U << 31) & bitset) {
-+      regs[count++] = reg;
-+    }
-+    bitset <<= 1;
-+  }
++  bind(LOOP8);
++  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
++  beq(ch, ch1, MATCH);
++  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
++  beq(ch, ch1, MATCH1);
++  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
++  beq(ch, ch1, MATCH2);
++  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
++  beq(ch, ch1, MATCH3);
++  isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8));
++  beq(ch, ch1, MATCH4);
++  isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10));
++  beq(ch, ch1, MATCH5);
++  isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12));
++  beq(ch, ch1, MATCH6);
++  isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14));
++  beq(ch, ch1, MATCH7);
++  addi(index, index, 8);
++  addi(str1, str1, isL ? 8 : 16);
++  blt(index, cnt1, LOOP);
++  j(NOMATCH);
 +
-+  for (int i = count - 1; i >= 0; i--) {
-+    vl1r_v(as_VectorRegister(regs[i]), stack);
-+    add(stack, stack, vector_size_in_bytes);
-+  }
++  bind(LOOP4);
++  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
++  beq(ch, ch1, MATCH);
++  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
++  beq(ch, ch1, MATCH1);
++  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
++  beq(ch, ch1, MATCH2);
++  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
++  beq(ch, ch1, MATCH3);
++  addi(index, index, 4);
++  addi(str1, str1, isL ? 4 : 8);
++  bge(index, cnt1, NOMATCH);
 +
-+  return count * vector_size_in_bytes / wordSize;
-+}
-+#endif // COMPILER2
++  bind(LOOP1);
++  isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1));
++  beq(ch, ch1, MATCH);
++  addi(index, index, 1);
++  addi(str1, str1, isL ? 1 : 2);
++  blt(index, cnt1, LOOP1);
++  j(NOMATCH);
 +
-+void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) {
-+  CompressibleRegion cr(this);
-+  // Push integer registers x7, x10-x17, x28-x31.
-+  push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp);
++  bind(MATCH1);
++  addi(index, index, 1);
++  j(MATCH);
 +
-+  // Push float registers f0-f7, f10-f17, f28-f31.
-+  addi(sp, sp, - wordSize * 20);
-+  int offset = 0;
-+  for (int i = 0; i < 32; i++) {
-+    if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
-+      fsd(as_FloatRegister(i), Address(sp, wordSize * (offset ++)));
-+    }
-+  }
-+}
++  bind(MATCH2);
++  addi(index, index, 2);
++  j(MATCH);
 +
-+void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) {
-+  CompressibleRegion cr(this);
-+  int offset = 0;
-+  for (int i = 0; i < 32; i++) {
-+    if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
-+      fld(as_FloatRegister(i), Address(sp, wordSize * (offset ++)));
-+    }
-+  }
-+  addi(sp, sp, wordSize * 20);
++  bind(MATCH3);
++  addi(index, index, 3);
++  j(MATCH);
 +
-+  pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp);
-+}
++  bind(MATCH4);
++  addi(index, index, 4);
++  j(MATCH);
 +
-+// Push all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4).
-+void MacroAssembler::pusha() {
-+  CompressibleRegion cr(this);
-+  push_reg(0xffffffe2, sp);
-+}
++  bind(MATCH5);
++  addi(index, index, 5);
++  j(MATCH);
 +
-+// Pop all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4).
-+void MacroAssembler::popa() {
-+  CompressibleRegion cr(this);
-+  pop_reg(0xffffffe2, sp);
++  bind(MATCH6);
++  addi(index, index, 6);
++  j(MATCH);
++
++  bind(MATCH7);
++  addi(index, index, 7);
++
++  bind(MATCH);
++  mv(result, index);
++  bind(NOMATCH);
++  BLOCK_COMMENT("} string_indexof_char_short");
 +}
 +
-+void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) {
-+  CompressibleRegion cr(this);
-+  // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
-+  push_reg(0xffffffe0, sp);
++// StringUTF16.indexOfChar
++// StringLatin1.indexOfChar
++void MacroAssembler::string_indexof_char(Register str1, Register cnt1,
++                                            Register ch, Register result,
++                                            Register tmp1, Register tmp2,
++                                            Register tmp3, Register tmp4,
++                                            bool isL)
++{
++  Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG;
++  Register ch1 = t0;
++  Register orig_cnt = t1;
++  Register mask1 = tmp3;
++  Register mask2 = tmp2;
++  Register match_mask = tmp1;
++  Register trailing_char = tmp4;
++  Register unaligned_elems = tmp4;
 +
-+  // float registers
-+  addi(sp, sp, - 32 * wordSize);
-+  for (int i = 0; i < 32; i++) {
-+    fsd(as_FloatRegister(i), Address(sp, i * wordSize));
-+  }
++  BLOCK_COMMENT("string_indexof_char {");
++  beqz(cnt1, NOMATCH);
 +
-+  // vector registers
-+  if (save_vectors) {
-+    sub(sp, sp, vector_size_in_bytes * VectorRegisterImpl::number_of_registers);
-+    vsetvli(t0, x0, Assembler::e64, Assembler::m8);
-+    for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) {
-+      add(t0, sp, vector_size_in_bytes * i);
-+      vse64_v(as_VectorRegister(i), t0);
-+    }
-+  }
-+}
++  addi(t0, cnt1, isL ? -32 : -16);
++  bgtz(t0, DO_LONG);
++  string_indexof_char_short(str1, cnt1, ch, result, isL);
++  j(DONE);
 +
-+void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) {
-+  CompressibleRegion cr(this);
-+  // vector registers
-+  if (restore_vectors) {
-+    vsetvli(t0, x0, Assembler::e64, Assembler::m8);
-+    for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) {
-+      vle64_v(as_VectorRegister(i), sp);
-+      add(sp, sp, vector_size_in_bytes * 8);
++  bind(DO_LONG);
++  mv(orig_cnt, cnt1);
++  if (AvoidUnalignedAccesses) {
++    Label ALIGNED;
++    andi(unaligned_elems, str1, 0x7);
++    beqz(unaligned_elems, ALIGNED);
++    sub(unaligned_elems, unaligned_elems, 8);
++    neg(unaligned_elems, unaligned_elems);
++    if (!isL) {
++      srli(unaligned_elems, unaligned_elems, 1);
 +    }
++    // do unaligned part per element
++    string_indexof_char_short(str1, unaligned_elems, ch, result, isL);
++    bgez(result, DONE);
++    mv(orig_cnt, cnt1);
++    sub(cnt1, cnt1, unaligned_elems);
++    bind(ALIGNED);
 +  }
 +
-+  // float registers
-+  for (int i = 0; i < 32; i++) {
-+    fld(as_FloatRegister(i), Address(sp, i * wordSize));
++  // duplicate ch
++  if (isL) {
++    slli(ch1, ch, 8);
++    orr(ch, ch1, ch);
 +  }
-+  addi(sp, sp, 32 * wordSize);
-+
-+  // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
-+  pop_reg(0xffffffe0, sp);
-+}
++  slli(ch1, ch, 16);
++  orr(ch, ch1, ch);
++  slli(ch1, ch, 32);
++  orr(ch, ch1, ch);
 +
-+static int patch_offset_in_jal(address branch, int64_t offset) {
-+  assert(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction!\n");
-+  Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1);                       // offset[20]    ==> branch[31]
-+  Assembler::patch(branch, 30, 21, (offset >> 1)  & 0x3ff);                     // offset[10:1]  ==> branch[30:21]
-+  Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1);                       // offset[11]    ==> branch[20]
-+  Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff);                      // offset[19:12] ==> branch[19:12]
-+  return NativeInstruction::instruction_size;                                   // only one instruction
-+}
++  if (!isL) {
++    slli(cnt1, cnt1, 1);
++  }
 +
-+static int patch_offset_in_conditional_branch(address branch, int64_t offset) {
-+  assert(is_imm_in_range(offset, 12, 1), "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne insrusction!\n");
-+  Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1);                       // offset[12]    ==> branch[31]
-+  Assembler::patch(branch, 30, 25, (offset >> 5)  & 0x3f);                      // offset[10:5]  ==> branch[30:25]
-+  Assembler::patch(branch, 7,  7,  (offset >> 11) & 0x1);                       // offset[11]    ==> branch[7]
-+  Assembler::patch(branch, 11, 8,  (offset >> 1)  & 0xf);                       // offset[4:1]   ==> branch[11:8]
-+  return NativeInstruction::instruction_size;                                   // only one instruction
-+}
++  uint64_t mask0101 = UCONST64(0x0101010101010101);
++  uint64_t mask0001 = UCONST64(0x0001000100010001);
++  mv(mask1, isL ? mask0101 : mask0001);
++  uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f);
++  uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff);
++  mv(mask2, isL ? mask7f7f : mask7fff);
 +
-+static int patch_offset_in_pc_relative(address branch, int64_t offset) {
-+  const int PC_RELATIVE_INSTRUCTION_NUM = 2;                                    // auipc, addi/jalr/load
-+  Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff);         // Auipc.          offset[31:12]  ==> branch[31:12]
-+  Assembler::patch(branch + 4, 31, 20, offset & 0xfff);                         // Addi/Jalr/Load. offset[11:0]   ==> branch[31:20]
-+  return PC_RELATIVE_INSTRUCTION_NUM * NativeInstruction::instruction_size;
-+}
++  bind(CH1_LOOP);
++  ld(ch1, Address(str1));
++  addi(str1, str1, 8);
++  addi(cnt1, cnt1, -8);
++  compute_match_mask(ch1, ch, match_mask, mask1, mask2);
++  bnez(match_mask, HIT);
++  bgtz(cnt1, CH1_LOOP);
++  j(NOMATCH);
 +
-+static int patch_addr_in_movptr(address branch, address target) {
-+  const int MOVPTR_INSTRUCTIONS_NUM = 6;                                        // lui + addi + slli + addi + slli + addi/jalr/load
-+  int32_t lower = ((intptr_t)target << 36) >> 36;
-+  int64_t upper = ((intptr_t)target - lower) >> 28;
-+  Assembler::patch(branch + 0,  31, 12, upper & 0xfffff);                       // Lui.             target[47:28] + target[27] ==> branch[31:12]
-+  Assembler::patch(branch + 4,  31, 20, (lower >> 16) & 0xfff);                 // Addi.            target[27:16] ==> branch[31:20]
-+  Assembler::patch(branch + 12, 31, 20, (lower >> 5) & 0x7ff);                  // Addi.            target[15: 5] ==> branch[31:20]
-+  Assembler::patch(branch + 20, 31, 20, lower & 0x1f);                          // Addi/Jalr/Load.  target[ 4: 0] ==> branch[31:20]
-+  return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
-+}
++  bind(HIT);
++  ctzc_bit(trailing_char, match_mask, isL, ch1, result);
++  srli(trailing_char, trailing_char, 3);
++  addi(cnt1, cnt1, 8);
++  ble(cnt1, trailing_char, NOMATCH);
++  // match case
++  if (!isL) {
++    srli(cnt1, cnt1, 1);
++    srli(trailing_char, trailing_char, 1);
++  }
 +
-+static int patch_imm_in_li64(address branch, address target) {
-+  const int LI64_INSTRUCTIONS_NUM = 8;                                          // lui + addi + slli + addi + slli + addi + slli + addi
-+  int64_t lower = (intptr_t)target & 0xffffffff;
-+  lower = lower - ((lower << 44) >> 44);
-+  int64_t tmp_imm = ((uint64_t)((intptr_t)target & 0xffffffff00000000)) + (uint64_t)lower;
-+  int32_t upper =  (tmp_imm - (int32_t)lower) >> 32;
-+  int64_t tmp_upper = upper, tmp_lower = upper;
-+  tmp_lower = (tmp_lower << 52) >> 52;
-+  tmp_upper -= tmp_lower;
-+  tmp_upper >>= 12;
-+  // Load upper 32 bits. Upper = target[63:32], but if target[31] = 1 or (target[31:28] == 0x7ff && target[19] == 1),
-+  // upper = target[63:32] + 1.
-+  Assembler::patch(branch + 0,  31, 12, tmp_upper & 0xfffff);                       // Lui.
-+  Assembler::patch(branch + 4,  31, 20, tmp_lower & 0xfff);                         // Addi.
-+  // Load the rest 32 bits.
-+  Assembler::patch(branch + 12, 31, 20, ((int32_t)lower >> 20) & 0xfff);            // Addi.
-+  Assembler::patch(branch + 20, 31, 20, (((intptr_t)target << 44) >> 52) & 0xfff);  // Addi.
-+  Assembler::patch(branch + 28, 31, 20, (intptr_t)target & 0xff);                   // Addi.
-+  return LI64_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
-+}
++  sub(result, orig_cnt, cnt1);
++  add(result, result, trailing_char);
++  j(DONE);
 +
-+static int patch_imm_in_li32(address branch, int32_t target) {
-+  const int LI32_INSTRUCTIONS_NUM = 2;                                          // lui + addiw
-+  int64_t upper = (intptr_t)target;
-+  int32_t lower = (((int32_t)target) << 20) >> 20;
-+  upper -= lower;
-+  upper = (int32_t)upper;
-+  Assembler::patch(branch + 0,  31, 12, (upper >> 12) & 0xfffff);               // Lui.
-+  Assembler::patch(branch + 4,  31, 20, lower & 0xfff);                         // Addiw.
-+  return LI32_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
-+}
++  bind(NOMATCH);
++  mv(result, -1);
 +
-+static long get_offset_of_jal(address insn_addr) {
-+  assert_cond(insn_addr != NULL);
-+  long offset = 0;
-+  unsigned insn = *(unsigned*)insn_addr;
-+  long val = (long)Assembler::sextract(insn, 31, 12);
-+  offset |= ((val >> 19) & 0x1) << 20;
-+  offset |= (val & 0xff) << 12;
-+  offset |= ((val >> 8) & 0x1) << 11;
-+  offset |= ((val >> 9) & 0x3ff) << 1;
-+  offset = (offset << 43) >> 43;
-+  return offset;
++  bind(DONE);
++  BLOCK_COMMENT("} string_indexof_char");
 +}
 +
-+static long get_offset_of_conditional_branch(address insn_addr) {
-+  long offset = 0;
-+  assert_cond(insn_addr != NULL);
-+  unsigned insn = *(unsigned*)insn_addr;
-+  offset = (long)Assembler::sextract(insn, 31, 31);
-+  offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11);
-+  offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5);
-+  offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1);
-+  offset = (offset << 41) >> 41;
-+  return offset;
-+}
++typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp);
 +
-+static long get_offset_of_pc_relative(address insn_addr) {
-+  long offset = 0;
-+  assert_cond(insn_addr != NULL);
-+  offset = ((long)(Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12))) << 12;                                  // Auipc.
-+  offset += ((long)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20));                                         // Addi/Jalr/Load.
-+  offset = (offset << 32) >> 32;
-+  return offset;
-+}
++// Search for needle in haystack and return index or -1
++// x10: result
++// x11: haystack
++// x12: haystack_len
++// x13: needle
++// x14: needle_len
++void MacroAssembler::string_indexof(Register haystack, Register needle,
++                                       Register haystack_len, Register needle_len,
++                                       Register tmp1, Register tmp2,
++                                       Register tmp3, Register tmp4,
++                                       Register tmp5, Register tmp6,
++                                       Register result, int ae)
++{
++  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
 +
-+static address get_target_of_movptr(address insn_addr) {
-+  assert_cond(insn_addr != NULL);
-+  intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 28;    // Lui.
-+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 16;                        // Addi.
-+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 5;                         // Addi.
-+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20));                              // Addi/Jalr/Load.
-+  return (address) target_address;
-+}
++  Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH;
 +
-+static address get_target_of_li64(address insn_addr) {
-+  assert_cond(insn_addr != NULL);
-+  intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 44;    // Lui.
-+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 32;                        // Addi.
-+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 20;                        // Addi.
-+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)) << 8;                         // Addi.
-+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[7], 31, 20));                              // Addi.
-+  return (address)target_address;
-+}
++  Register ch1 = t0;
++  Register ch2 = t1;
++  Register nlen_tmp = tmp1; // needle len tmp
++  Register hlen_tmp = tmp2; // haystack len tmp
++  Register result_tmp = tmp4;
 +
-+static address get_target_of_li32(address insn_addr) {
-+  assert_cond(insn_addr != NULL);
-+  intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 12;    // Lui.
-+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20));                              // Addiw.
-+  return (address)target_address;
-+}
++  bool isLL = ae == StrIntrinsicNode::LL;
 +
-+// Patch any kind of instruction; there may be several instructions.
-+// Return the total length (in bytes) of the instructions.
-+int MacroAssembler::pd_patch_instruction_size(address branch, address target) {
-+  assert_cond(branch != NULL);
-+  int64_t offset = target - branch;
-+  if (NativeInstruction::is_jal_at(branch)) {                         // jal
-+    return patch_offset_in_jal(branch, offset);
-+  } else if (NativeInstruction::is_branch_at(branch)) {               // beq/bge/bgeu/blt/bltu/bne
-+    return patch_offset_in_conditional_branch(branch, offset);
-+  } else if (NativeInstruction::is_pc_relative_at(branch)) {          // auipc, addi/jalr/load
-+    return patch_offset_in_pc_relative(branch, offset);
-+  } else if (NativeInstruction::is_movptr_at(branch)) {               // movptr
-+    return patch_addr_in_movptr(branch, target);
-+  } else if (NativeInstruction::is_li64_at(branch)) {                 // li64
-+    return patch_imm_in_li64(branch, target);
-+  } else if (NativeInstruction::is_li32_at(branch)) {                 // li32
-+    int64_t imm = (intptr_t)target;
-+    return patch_imm_in_li32(branch, (int32_t)imm);
-+  } else {
-+#ifdef ASSERT
-+    tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n",
-+                  *(unsigned*)branch, p2i(branch));
-+    Disassembler::decode(branch - 16, branch + 16);
-+#endif
-+    ShouldNotReachHere();
-+    return -1;
-+  }
-+}
-+
-+address MacroAssembler::target_addr_for_insn(address insn_addr) {
-+  long offset = 0;
-+  assert_cond(insn_addr != NULL);
-+  if (NativeInstruction::is_jal_at(insn_addr)) {                     // jal
-+    offset = get_offset_of_jal(insn_addr);
-+  } else if (NativeInstruction::is_branch_at(insn_addr)) {           // beq/bge/bgeu/blt/bltu/bne
-+    offset = get_offset_of_conditional_branch(insn_addr);
-+  } else if (NativeInstruction::is_pc_relative_at(insn_addr)) {      // auipc, addi/jalr/load
-+    offset = get_offset_of_pc_relative(insn_addr);
-+  } else if (NativeInstruction::is_movptr_at(insn_addr)) {           // movptr
-+    return get_target_of_movptr(insn_addr);
-+  } else if (NativeInstruction::is_li64_at(insn_addr)) {             // li64
-+    return get_target_of_li64(insn_addr);
-+  } else if (NativeInstruction::is_li32_at(insn_addr)) {             // li32
-+    return get_target_of_li32(insn_addr);
-+  } else {
-+    ShouldNotReachHere();
-+  }
-+  return address(((uintptr_t)insn_addr + offset));
-+}
-+
-+int MacroAssembler::patch_oop(address insn_addr, address o) {
-+  // OOPs are either narrow (32 bits) or wide (48 bits).  We encode
-+  // narrow OOPs by setting the upper 16 bits in the first
-+  // instruction.
-+  if (NativeInstruction::is_li32_at(insn_addr)) {
-+    // Move narrow OOP
-+    uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o));
-+    return patch_imm_in_li32(insn_addr, (int32_t)n);
-+  } else if (NativeInstruction::is_movptr_at(insn_addr)) {
-+    // Move wide OOP
-+    return patch_addr_in_movptr(insn_addr, o);
-+  }
-+  ShouldNotReachHere();
-+  return -1;
-+}
-+
-+void MacroAssembler::reinit_heapbase() {
-+  if (UseCompressedOops) {
-+    if (Universe::is_fully_initialized()) {
-+      mv(xheapbase, CompressedOops::ptrs_base());
-+    } else {
-+      int32_t offset = 0;
-+      la_patchable(xheapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr()), offset);
-+      ld(xheapbase, Address(xheapbase, offset));
-+    }
-+  }
-+}
++  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
++  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
++  int needle_chr_shift = needle_isL ? 0 : 1;
++  int haystack_chr_shift = haystack_isL ? 0 : 1;
++  int needle_chr_size = needle_isL ? 1 : 2;
++  int haystack_chr_size = haystack_isL ? 1 : 2;
++  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
++                                   (load_chr_insn)&MacroAssembler::lhu;
++  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
++                                     (load_chr_insn)&MacroAssembler::lhu;
 +
-+void MacroAssembler::mv(Register Rd, Address dest) {
-+  assert(dest.getMode() == Address::literal, "Address mode should be Address::literal");
-+  code_section()->relocate(pc(), dest.rspec());
-+  movptr(Rd, dest.target());
-+}
++  BLOCK_COMMENT("string_indexof {");
 +
-+void MacroAssembler::mv(Register Rd, address addr) {
-+  // Here in case of use with relocation, use fix length instruciton
-+  // movptr instead of li
-+  movptr(Rd, addr);
-+}
++  // Note, inline_string_indexOf() generates checks:
++  // if (pattern.count > src.count) return -1;
++  // if (pattern.count == 0) return 0;
 +
-+void MacroAssembler::mv(Register Rd, RegisterOrConstant src) {
-+  if (src.is_register()) {
-+    mv(Rd, src.as_register());
-+  } else {
-+    mv(Rd, src.as_constant());
-+  }
-+}
++  // We have two strings, a source string in haystack, haystack_len and a pattern string
++  // in needle, needle_len. Find the first occurence of pattern in source or return -1.
 +
-+void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) {
-+  andr(Rd, Rs1, Rs2);
-+  // addw: The result is clipped to 32 bits, then the sign bit is extended,
-+  // and the result is stored in Rd
-+  addw(Rd, Rd, zr);
-+}
++  // For larger pattern and source we use a simplified Boyer Moore algorithm.
++  // With a small pattern and source we use linear scan.
 +
-+void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) {
-+  orr(Rd, Rs1, Rs2);
-+  // addw: The result is clipped to 32 bits, then the sign bit is extended,
-+  // and the result is stored in Rd
-+  addw(Rd, Rd, zr);
-+}
++  // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm.
++  sub(result_tmp, haystack_len, needle_len);
++  // needle_len < 8, use linear scan
++  sub(t0, needle_len, 8);
++  bltz(t0, LINEARSEARCH);
++  // needle_len >= 256, use linear scan
++  sub(t0, needle_len, 256);
++  bgez(t0, LINEARSTUB);
++  // needle_len >= haystack_len/4, use linear scan
++  srli(t0, haystack_len, 2);
++  bge(needle_len, t0, LINEARSTUB);
 +
-+void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) {
-+  xorr(Rd, Rs1, Rs2);
-+  // addw: The result is clipped to 32 bits, then the sign bit is extended,
-+  // and the result is stored in Rd
-+  addw(Rd, Rd, zr);
-+}
++  // Boyer-Moore-Horspool introduction:
++  // The Boyer Moore alogorithm is based on the description here:-
++  //
++  // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
++  //
++  // This describes and algorithm with 2 shift rules. The 'Bad Character' rule
++  // and the 'Good Suffix' rule.
++  //
++  // These rules are essentially heuristics for how far we can shift the
++  // pattern along the search string.
++  //
++  // The implementation here uses the 'Bad Character' rule only because of the
++  // complexity of initialisation for the 'Good Suffix' rule.
++  //
++  // This is also known as the Boyer-Moore-Horspool algorithm:
++  //
++  // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
++  //
++  // #define ASIZE 256
++  //
++  //    int bm(unsigned char *pattern, int m, unsigned char *src, int n) {
++  //      int i, j;
++  //      unsigned c;
++  //      unsigned char bc[ASIZE];
++  //
++  //      /* Preprocessing */
++  //      for (i = 0; i < ASIZE; ++i)
++  //        bc[i] = m;
++  //      for (i = 0; i < m - 1; ) {
++  //        c = pattern[i];
++  //        ++i;
++  //        // c < 256 for Latin1 string, so, no need for branch
++  //        #ifdef PATTERN_STRING_IS_LATIN1
++  //        bc[c] = m - i;
++  //        #else
++  //        if (c < ASIZE) bc[c] = m - i;
++  //        #endif
++  //      }
++  //
++  //      /* Searching */
++  //      j = 0;
++  //      while (j <= n - m) {
++  //        c = src[i+j];
++  //        if (pattern[m-1] == c)
++  //          int k;
++  //          for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
++  //          if (k < 0) return j;
++  //          // c < 256 for Latin1 string, so, no need for branch
++  //          #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1
++  //          // LL case: (c< 256) always true. Remove branch
++  //          j += bc[pattern[j+m-1]];
++  //          #endif
++  //          #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF
++  //          // UU case: need if (c<ASIZE) check. Skip 1 character if not.
++  //          if (c < ASIZE)
++  //            j += bc[pattern[j+m-1]];
++  //          else
++  //            j += 1
++  //          #endif
++  //          #ifdef SOURCE_IS_UTF_AND_PATTERN_IS_LATIN1
++  //          // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.
++  //          if (c < ASIZE)
++  //            j += bc[pattern[j+m-1]];
++  //          else
++  //            j += m
++  //          #endif
++  //      }
++  //      return -1;
++  //    }
 +
-+// Note: load_unsigned_short used to be called load_unsigned_word.
-+int MacroAssembler::load_unsigned_short(Register dst, Address src) {
-+  int off = offset();
-+  lhu(dst, src);
-+  return off;
-+}
++  // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result
++  Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,
++          BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;
 +
-+int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
-+  int off = offset();
-+  lbu(dst, src);
-+  return off;
-+}
++  Register haystack_end = haystack_len;
++  Register skipch = tmp2;
 +
-+int MacroAssembler::load_signed_short(Register dst, Address src) {
-+  int off = offset();
-+  lh(dst, src);
-+  return off;
-+}
++  // pattern length is >=8, so, we can read at least 1 register for cases when
++  // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for
++  // UL case. We'll re-read last character in inner pre-loop code to have
++  // single outer pre-loop load
++  const int firstStep = isLL ? 7 : 3;
 +
-+int MacroAssembler::load_signed_byte(Register dst, Address src) {
-+  int off = offset();
-+  lb(dst, src);
-+  return off;
-+}
++  const int ASIZE = 256;
++  const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd)
 +
-+void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
-+  switch (size_in_bytes) {
-+    case  8:  ld(dst, src); break;
-+    case  4:  is_signed ? lw(dst, src) : lwu(dst, src); break;
-+    case  2:  is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
-+    case  1:  is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
-+    default:  ShouldNotReachHere();
-+  }
-+}
++  sub(sp, sp, ASIZE);
 +
-+void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
-+  switch (size_in_bytes) {
-+    case  8:  sd(src, dst); break;
-+    case  4:  sw(src, dst); break;
-+    case  2:  sh(src, dst); break;
-+    case  1:  sb(src, dst); break;
-+    default:  ShouldNotReachHere();
-+  }
-+}
++  // init BC offset table with default value: needle_len
++  slli(t0, needle_len, 8);
++  orr(t0, t0, needle_len); // [63...16][needle_len][needle_len]
++  slli(tmp1, t0, 16);
++  orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len]
++  slli(tmp1, t0, 32);
++  orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len]
 +
-+// reverse bytes in halfword in lower 16 bits and sign-extend
-+// Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits)
-+void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) {
-+  if (UseRVB) {
-+    rev8(Rd, Rs);
-+    srai(Rd, Rd, 48);
-+    return;
-+  }
-+  assert_different_registers(Rs, tmp);
-+  assert_different_registers(Rd, tmp);
-+  srli(tmp, Rs, 8);
-+  andi(tmp, tmp, 0xFF);
-+  slli(Rd, Rs, 56);
-+  srai(Rd, Rd, 48); // sign-extend
-+  orr(Rd, Rd, tmp);
-+}
++  mv(ch1, sp);  // ch1 is t0
++  mv(tmp6, ASIZE / STORE_BYTES); // loop iterations
 +
-+// reverse bytes in lower word and sign-extend
-+// Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits)
-+void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
-+  if (UseRVB) {
-+    rev8(Rd, Rs);
-+    srai(Rd, Rd, 32);
-+    return;
++  bind(BM_INIT_LOOP);
++  // for (i = 0; i < ASIZE; ++i)
++  //   bc[i] = m;
++  for (int i = 0; i < 4; i++) {
++    sd(tmp5, Address(ch1, i * wordSize));
 +  }
-+  assert_different_registers(Rs, tmp1, tmp2);
-+  assert_different_registers(Rd, tmp1, tmp2);
-+  revb_h_w_u(Rd, Rs, tmp1, tmp2);
-+  slli(tmp2, Rd, 48);
-+  srai(tmp2, tmp2, 32); // sign-extend
-+  srli(Rd, Rd, 16);
-+  orr(Rd, Rd, tmp2);
-+}
++  add(ch1, ch1, 32);
++  sub(tmp6, tmp6, 4);
++  bgtz(tmp6, BM_INIT_LOOP);
 +
-+// reverse bytes in halfword in lower 16 bits and zero-extend
-+// Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
-+void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) {
-+  if (UseRVB) {
-+    rev8(Rd, Rs);
-+    srli(Rd, Rd, 48);
-+    return;
-+  }
-+  assert_different_registers(Rs, tmp);
-+  assert_different_registers(Rd, tmp);
-+  srli(tmp, Rs, 8);
-+  andi(tmp, tmp, 0xFF);
-+  andi(Rd, Rs, 0xFF);
-+  slli(Rd, Rd, 8);
-+  orr(Rd, Rd, tmp);
-+}
++  sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern
++  Register orig_haystack = tmp5;
++  mv(orig_haystack, haystack);
++  // result_tmp = tmp4
++  shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift);
++  sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1
++  mv(tmp3, needle);
 +
-+// reverse bytes in halfwords in lower 32 bits and zero-extend
-+// Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
-+void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) {
-+  if (UseRVB) {
-+    rev8(Rd, Rs);
-+    rori(Rd, Rd, 32);
-+    roriw(Rd, Rd, 16);
-+    zext_w(Rd, Rd);
-+    return;
++  //  for (i = 0; i < m - 1; ) {
++  //    c = pattern[i];
++  //    ++i;
++  //    // c < 256 for Latin1 string, so, no need for branch
++  //    #ifdef PATTERN_STRING_IS_LATIN1
++  //    bc[c] = m - i;
++  //    #else
++  //    if (c < ASIZE) bc[c] = m - i;
++  //    #endif
++  //  }
++  bind(BCLOOP);
++  (this->*needle_load_1chr)(ch1, Address(tmp3), noreg);
++  add(tmp3, tmp3, needle_chr_size);
++  if (!needle_isL) {
++    // ae == StrIntrinsicNode::UU
++    mv(tmp6, ASIZE);
++    bgeu(ch1, tmp6, BCSKIP);
 +  }
-+  assert_different_registers(Rs, tmp1, tmp2);
-+  assert_different_registers(Rd, tmp1, tmp2);
-+  srli(tmp2, Rs, 16);
-+  revb_h_h_u(tmp2, tmp2, tmp1);
-+  revb_h_h_u(Rd, Rs, tmp1);
-+  slli(tmp2, tmp2, 16);
-+  orr(Rd, Rd, tmp2);
-+}
++  add(tmp4, sp, ch1);
++  sb(ch2, Address(tmp4)); // store skip offset to BC offset table
 +
-+// This method is only used for revb_h
-+// Rd = Rs[47:0] Rs[55:48] Rs[63:56]
-+void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) {
-+  assert_different_registers(Rs, tmp1, tmp2);
-+  assert_different_registers(Rd, tmp1);
-+  srli(tmp1, Rs, 48);
-+  andi(tmp2, tmp1, 0xFF);
-+  slli(tmp2, tmp2, 8);
-+  srli(tmp1, tmp1, 8);
-+  orr(tmp1, tmp1, tmp2);
-+  slli(Rd, Rs, 16);
-+  orr(Rd, Rd, tmp1);
-+}
++  bind(BCSKIP);
++  sub(ch2, ch2, 1); // for next pattern element, skip distance -1
++  bgtz(ch2, BCLOOP);
 +
-+// reverse bytes in each halfword
-+// Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8]
-+void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) {
-+  if (UseRVB) {
-+    assert_different_registers(Rs, tmp1);
-+    assert_different_registers(Rd, tmp1);
-+    rev8(Rd, Rs);
-+    zext_w(tmp1, Rd);
-+    roriw(tmp1, tmp1, 16);
-+    slli(tmp1, tmp1, 32);
-+    srli(Rd, Rd, 32);
-+    roriw(Rd, Rd, 16);
-+    zext_w(Rd, Rd);
-+    orr(Rd, Rd, tmp1);
-+    return;
-+  }
-+  assert_different_registers(Rs, tmp1, tmp2);
-+  assert_different_registers(Rd, tmp1, tmp2);
-+  revb_h_helper(Rd, Rs, tmp1, tmp2);
-+  for (int i = 0; i < 3; ++i) {
-+    revb_h_helper(Rd, Rd, tmp1, tmp2);
++  // tmp6: pattern end, address after needle
++  shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift);
++  if (needle_isL == haystack_isL) {
++    // load last 8 bytes (8LL/4UU symbols)
++    ld(tmp6, Address(tmp6, -wordSize));
++  } else {
++    // UL: from UTF-16(source) search Latin1(pattern)
++    lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols)
++    // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d
++    // We'll have to wait until load completed, but it's still faster than per-character loads+checks
++    srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a
++    slli(ch2, tmp6, XLEN - 24);
++    srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b
++    slli(ch1, tmp6, XLEN - 16);
++    srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c
++    andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d
++    slli(ch2, ch2, 16);
++    orr(ch2, ch2, ch1); // 0x00000b0c
++    slli(result, tmp3, 48); // use result as temp register
++    orr(tmp6, tmp6, result); // 0x0a00000d
++    slli(result, ch2, 16);
++    orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d
 +  }
-+}
 +
-+// reverse bytes in each word
-+// Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24]
-+void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
-+  if (UseRVB) {
-+    rev8(Rd, Rs);
-+    rori(Rd, Rd, 32);
-+    return;
++  // i = m - 1;
++  // skipch = j + i;
++  // if (skipch == pattern[m - 1]
++  //   for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
++  // else
++  //   move j with bad char offset table
++  bind(BMLOOPSTR2);
++  // compare pattern to source string backward
++  shadd(result, nlen_tmp, haystack, result, haystack_chr_shift);
++  (this->*haystack_load_1chr)(skipch, Address(result), noreg);
++  sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8
++  if (needle_isL == haystack_isL) {
++    // re-init tmp3. It's for free because it's executed in parallel with
++    // load above. Alternative is to initialize it before loop, but it'll
++    // affect performance on in-order systems with 2 or more ld/st pipelines
++    srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1]
 +  }
-+  assert_different_registers(Rs, tmp1, tmp2);
-+  assert_different_registers(Rd, tmp1, tmp2);
-+  revb(Rd, Rs, tmp1, tmp2);
-+  ror_imm(Rd, Rd, 32);
-+}
-+
-+// reverse bytes in doubleword
-+// Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56]
-+void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) {
-+  if (UseRVB) {
-+    rev8(Rd, Rs);
-+    return;
++  if (!isLL) { // UU/UL case
++    slli(ch2, nlen_tmp, 1); // offsets in bytes
 +  }
-+  assert_different_registers(Rs, tmp1, tmp2);
-+  assert_different_registers(Rd, tmp1, tmp2);
-+  andi(tmp1, Rs, 0xFF);
-+  slli(tmp1, tmp1, 8);
-+  for (int step = 8; step < 56; step += 8) {
-+    srli(tmp2, Rs, step);
-+    andi(tmp2, tmp2, 0xFF);
-+    orr(tmp1, tmp1, tmp2);
-+    slli(tmp1, tmp1, 8);
++  bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char
++  add(result, haystack, isLL ? nlen_tmp : ch2);
++  ld(ch2, Address(result)); // load 8 bytes from source string
++  mv(ch1, tmp6);
++  if (isLL) {
++    j(BMLOOPSTR1_AFTER_LOAD);
++  } else {
++    sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8
++    j(BMLOOPSTR1_CMP);
 +  }
-+  srli(Rd, Rs, 56);
-+  andi(Rd, Rd, 0xFF);
-+  orr(Rd, tmp1, Rd);
-+}
 +
-+// rotate right with shift bits
-+void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp)
-+{
-+  if (UseRVB) {
-+    rori(dst, src, shift);
-+    return;
-+  }
++  bind(BMLOOPSTR1);
++  shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift);
++  (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
++  shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift);
++  (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
 +
-+  assert_different_registers(dst, tmp);
-+  assert_different_registers(src, tmp);
-+  assert(shift < 64, "shift amount must be < 64");
-+  slli(tmp, src, 64 - shift);
-+  srli(dst, src, shift);
-+  orr(dst, dst, tmp);
-+}
++  bind(BMLOOPSTR1_AFTER_LOAD);
++  sub(nlen_tmp, nlen_tmp, 1);
++  bltz(nlen_tmp, BMLOOPSTR1_LASTCMP);
 +
-+void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) {
-+  if (is_imm_in_range(imm, 12, 0)) {
-+    and_imm12(Rd, Rn, imm);
-+  } else {
-+    assert_different_registers(Rn, tmp);
-+    li(tmp, imm);
-+    andr(Rd, Rn, tmp);
-+  }
-+}
++  bind(BMLOOPSTR1_CMP);
++  beq(ch1, ch2, BMLOOPSTR1);
 +
-+void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) {
-+  ld(tmp1, adr);
-+  if (src.is_register()) {
-+    orr(tmp1, tmp1, src.as_register());
-+  } else {
-+    if (is_imm_in_range(src.as_constant(), 12, 0)) {
-+      ori(tmp1, tmp1, src.as_constant());
++  bind(BMSKIP);
++  if (!isLL) {
++    // if we've met UTF symbol while searching Latin1 pattern, then we can
++    // skip needle_len symbols
++    if (needle_isL != haystack_isL) {
++      mv(result_tmp, needle_len);
 +    } else {
-+      assert_different_registers(tmp1, tmp2);
-+      li(tmp2, src.as_constant());
-+      orr(tmp1, tmp1, tmp2);
++      mv(result_tmp, 1);
 +    }
++    mv(t0, ASIZE);
++    bgeu(skipch, t0, BMADV);
 +  }
-+  sd(tmp1, adr);
-+}
++  add(result_tmp, sp, skipch);
++  lbu(result_tmp, Address(result_tmp)); // load skip offset
 +
-+void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L) {
-+  if (UseCompressedClassPointers) {
-+      lwu(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
-+    if (CompressedKlassPointers::base() == NULL) {
-+      slli(tmp, tmp, CompressedKlassPointers::shift());
-+      beq(trial_klass, tmp, L);
-+      return;
-+    }
-+    decode_klass_not_null(tmp);
-+  } else {
-+    ld(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
-+  }
-+  beq(trial_klass, tmp, L);
-+}
++  bind(BMADV);
++  sub(nlen_tmp, needle_len, 1);
++  // move haystack after bad char skip offset
++  shadd(haystack, result_tmp, haystack, result, haystack_chr_shift);
++  ble(haystack, haystack_end, BMLOOPSTR2);
++  add(sp, sp, ASIZE);
++  j(NOMATCH);
 +
-+// Move an oop into a register. immediate is true if we want
-+// immediate instructions and nmethod entry barriers are not enabled.
-+// i.e. we are not going to patch this instruction while the code is being
-+// executed by another thread.
-+void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) {
-+  int oop_index;
-+  if (obj == NULL) {
-+    oop_index = oop_recorder()->allocate_oop_index(obj);
-+  } else {
-+#ifdef ASSERT
-+    {
-+      ThreadInVMfromUnknown tiv;
-+      assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop");
-+    }
-+#endif
-+    oop_index = oop_recorder()->find_index(obj);
-+  }
-+  RelocationHolder rspec = oop_Relocation::spec(oop_index);
++  bind(BMLOOPSTR1_LASTCMP);
++  bne(ch1, ch2, BMSKIP);
 +
-+  // nmethod entry barrier necessitate using the constant pool. They have to be
-+  // ordered with respected to oop access.
-+  // Using immediate literals would necessitate fence.i.
-+  if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate) {
-+    address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address
-+    ld_constant(dst, Address(dummy, rspec));
-+  } else
-+    mv(dst, Address((address)obj, rspec));
-+}
++  bind(BMMATCH);
++  sub(result, haystack, orig_haystack);
++  if (!haystack_isL) {
++    srli(result, result, 1);
++  }
++  add(sp, sp, ASIZE);
++  j(DONE);
 +
-+// Move a metadata address into a register.
-+void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
-+  int oop_index;
-+  if (obj == NULL) {
-+    oop_index = oop_recorder()->allocate_metadata_index(obj);
++  bind(LINEARSTUB);
++  sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm
++  bltz(t0, LINEARSEARCH);
++  mv(result, zr);
++  RuntimeAddress stub = NULL;
++  if (isLL) {
++    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll());
++    assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
++  } else if (needle_isL) {
++    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul());
++    assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
 +  } else {
-+    oop_index = oop_recorder()->find_index(obj);
++    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu());
++    assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
 +  }
-+  RelocationHolder rspec = metadata_Relocation::spec(oop_index);
-+  mv(dst, Address((address)obj, rspec));
-+}
-+
-+// Writes to stack successive pages until offset reached to check for
-+// stack overflow + shadow pages.  This clobbers tmp.
-+void MacroAssembler::bang_stack_size(Register size, Register tmp) {
-+  assert_different_registers(tmp, size, t0);
-+  // Bang stack for total size given plus shadow page size.
-+  // Bang one page at a time because large size can bang beyond yellow and
-+  // red zones.
-+  mv(t0, os::vm_page_size());
-+  Label loop;
-+  bind(loop);
-+  sub(tmp, sp, t0);
-+  subw(size, size, t0);
-+  sd(size, Address(tmp));
-+  bgtz(size, loop);
++  trampoline_call(stub);
++  j(DONE);
 +
-+  // Bang down shadow pages too.
-+  // At this point, (tmp-0) is the last address touched, so don't
-+  // touch it again.  (It was touched as (tmp-pagesize) but then tmp
-+  // was post-decremented.)  Skip this address by starting at i=1, and
-+  // touch a few more pages below.  N.B.  It is important to touch all
-+  // the way down to and including i=StackShadowPages.
-+  for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) {
-+    // this could be any sized move but this is can be a debugging crumb
-+    // so the bigger the better.
-+    sub(tmp, tmp, os::vm_page_size());
-+    sd(size, Address(tmp, 0));
-+  }
-+}
++  bind(NOMATCH);
++  mv(result, -1);
++  j(DONE);
 +
-+SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) {
-+  assert_cond(masm != NULL);
-+  int32_t offset = 0;
-+  _masm = masm;
-+  _masm->la_patchable(t0, ExternalAddress((address)flag_addr), offset);
-+  _masm->lbu(t0, Address(t0, offset));
-+  _masm->beqz(t0, _label);
-+}
++  bind(LINEARSEARCH);
++  string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae);
 +
-+SkipIfEqual::~SkipIfEqual() {
-+  assert_cond(_masm != NULL);
-+  _masm->bind(_label);
-+  _masm = NULL;
++  bind(DONE);
++  BLOCK_COMMENT("} string_indexof");
 +}
 +
-+void MacroAssembler::load_mirror(Register dst, Register method, Register tmp) {
-+  const int mirror_offset = in_bytes(Klass::java_mirror_offset());
-+  ld(dst, Address(xmethod, Method::const_offset()));
-+  ld(dst, Address(dst, ConstMethod::constants_offset()));
-+  ld(dst, Address(dst, ConstantPool::pool_holder_offset_in_bytes()));
-+  ld(dst, Address(dst, mirror_offset));
-+  resolve_oop_handle(dst, tmp);
-+}
++// string_indexof
++// result: x10
++// src: x11
++// src_count: x12
++// pattern: x13
++// pattern_count: x14 or 1/2/3/4
++void MacroAssembler::string_indexof_linearscan(Register haystack, Register needle,
++                                                  Register haystack_len, Register needle_len,
++                                                  Register tmp1, Register tmp2,
++                                                  Register tmp3, Register tmp4,
++                                                  int needle_con_cnt, Register result, int ae)
++{
++  // Note:
++  // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant
++  // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1
++  assert(needle_con_cnt <= 4, "Invalid needle constant count");
++  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
 +
-+void MacroAssembler::resolve_oop_handle(Register result, Register tmp) {
-+  // OopHandle::resolve is an indirection.
-+  assert_different_registers(result, tmp);
-+  access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, noreg);
-+}
++  Register ch1 = t0;
++  Register ch2 = t1;
++  Register hlen_neg = haystack_len, nlen_neg = needle_len;
++  Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4;
 +
-+// ((WeakHandle)result).resolve()
-+void MacroAssembler::resolve_weak_handle(Register result, Register tmp) {
-+  assert_different_registers(result, tmp);
-+  Label resolved;
++  bool isLL = ae == StrIntrinsicNode::LL;
 +
-+  // A null weak handle resolves to null.
-+  beqz(result, resolved);
++  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
++  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
++  int needle_chr_shift = needle_isL ? 0 : 1;
++  int haystack_chr_shift = haystack_isL ? 0 : 1;
++  int needle_chr_size = needle_isL ? 1 : 2;
++  int haystack_chr_size = haystack_isL ? 1 : 2;
 +
-+  // Only 64 bit platforms support GCs that require a tmp register
-+  // Only IN_HEAP loads require a thread_tmp register
-+  // WeakHandle::resolve is an indirection like jweak.
-+  access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
-+                 result, Address(result), tmp, noreg /* tmp_thread */);
-+  bind(resolved);
-+}
++  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
++                                   (load_chr_insn)&MacroAssembler::lhu;
++  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
++                                     (load_chr_insn)&MacroAssembler::lhu;
++  load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu;
++  load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld;
 +
-+void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
-+                                    Register dst, Address src,
-+                                    Register tmp1, Register thread_tmp) {
-+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+  decorators = AccessInternal::decorator_fixup(decorators);
-+  bool as_raw = (decorators & AS_RAW) != 0;
-+  if (as_raw) {
-+    bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
-+  } else {
-+    bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
-+  }
-+}
++  Label DO1, DO2, DO3, MATCH, NOMATCH, DONE;
 +
-+void MacroAssembler::null_check(Register reg, int offset) {
-+  if (needs_explicit_null_check(offset)) {
-+    // provoke OS NULL exception if reg = NULL by
-+    // accessing M[reg] w/o changing any registers
-+    // NOTE: this is plenty to provoke a segv
-+    ld(zr, Address(reg, 0));
-+  } else {
-+    // nothing to do, (later) access of M[reg + offset]
-+    // will provoke OS NULL exception if reg = NULL
-+  }
-+}
++  Register first = tmp3;
 +
-+void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
-+                                     Address dst, Register src,
-+                                     Register tmp1, Register thread_tmp) {
-+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+  decorators = AccessInternal::decorator_fixup(decorators);
-+  bool as_raw = (decorators & AS_RAW) != 0;
-+  if (as_raw) {
-+    bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, thread_tmp);
-+  } else {
-+    bs->store_at(this, decorators, type, dst, src, tmp1, thread_tmp);
-+  }
-+}
++  if (needle_con_cnt == -1) {
++    Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
 +
-+// Algorithm must match CompressedOops::encode.
-+void MacroAssembler::encode_heap_oop(Register d, Register s) {
-+  verify_oop(s, "broken oop in encode_heap_oop");
-+  if (CompressedOops::base() == NULL) {
-+    if (CompressedOops::shift() != 0) {
-+      assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
-+      srli(d, s, LogMinObjAlignmentInBytes);
-+    } else {
-+      mv(d, s);
-+    }
-+  } else {
-+    Label notNull;
-+    sub(d, s, xheapbase);
-+    bgez(d, notNull);
-+    mv(d, zr);
-+    bind(notNull);
-+    if (CompressedOops::shift() != 0) {
-+      assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
-+      srli(d, d, CompressedOops::shift());
-+    }
-+  }
-+}
++    sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2);
++    bltz(t0, DOSHORT);
 +
-+void MacroAssembler::load_klass(Register dst, Register src) {
-+  if (UseCompressedClassPointers) {
-+    lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
-+    decode_klass_not_null(dst);
-+  } else {
-+    ld(dst, Address(src, oopDesc::klass_offset_in_bytes()));
-+  }
-+}
++    (this->*needle_load_1chr)(first, Address(needle), noreg);
++    slli(t0, needle_len, needle_chr_shift);
++    add(needle, needle, t0);
++    neg(nlen_neg, t0);
++    slli(t0, result_tmp, haystack_chr_shift);
++    add(haystack, haystack, t0);
++    neg(hlen_neg, t0);
 +
-+void MacroAssembler::store_klass(Register dst, Register src) {
-+  // FIXME: Should this be a store release? concurrent gcs assumes
-+  // klass length is valid if klass field is not null.
-+  if (UseCompressedClassPointers) {
-+    encode_klass_not_null(src);
-+    sw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
-+  } else {
-+    sd(src, Address(dst, oopDesc::klass_offset_in_bytes()));
-+  }
-+}
++    bind(FIRST_LOOP);
++    add(t0, haystack, hlen_neg);
++    (this->*haystack_load_1chr)(ch2, Address(t0), noreg);
++    beq(first, ch2, STR1_LOOP);
 +
-+void MacroAssembler::store_klass_gap(Register dst, Register src) {
-+  if (UseCompressedClassPointers) {
-+    // Store to klass gap in destination
-+    sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
-+  }
-+}
++    bind(STR2_NEXT);
++    add(hlen_neg, hlen_neg, haystack_chr_size);
++    blez(hlen_neg, FIRST_LOOP);
++    j(NOMATCH);
 +
-+void  MacroAssembler::decode_klass_not_null(Register r) {
-+  decode_klass_not_null(r, r);
-+}
++    bind(STR1_LOOP);
++    add(nlen_tmp, nlen_neg, needle_chr_size);
++    add(hlen_tmp, hlen_neg, haystack_chr_size);
++    bgez(nlen_tmp, MATCH);
 +
-+void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) {
-+  assert(UseCompressedClassPointers, "should only be used for compressed headers");
++    bind(STR1_NEXT);
++    add(ch1, needle, nlen_tmp);
++    (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
++    add(ch2, haystack, hlen_tmp);
++    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
++    bne(ch1, ch2, STR2_NEXT);
++    add(nlen_tmp, nlen_tmp, needle_chr_size);
++    add(hlen_tmp, hlen_tmp, haystack_chr_size);
++    bltz(nlen_tmp, STR1_NEXT);
++    j(MATCH);
 +
-+  if (CompressedKlassPointers::base() == NULL) {
-+    if (CompressedKlassPointers::shift() != 0) {
-+      assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
-+      slli(dst, src, LogKlassAlignmentInBytes);
-+    } else {
-+      mv(dst, src);
++    bind(DOSHORT);
++    if (needle_isL == haystack_isL) {
++      sub(t0, needle_len, 2);
++      bltz(t0, DO1);
++      bgtz(t0, DO3);
 +    }
-+    return;
-+  }
-+
-+  Register xbase = dst;
-+  if (dst == src) {
-+    xbase = tmp;
 +  }
 +
-+  assert_different_registers(src, xbase);
-+  li(xbase, (uintptr_t)CompressedKlassPointers::base());
++  if (needle_con_cnt == 4) {
++    Label CH1_LOOP;
++    (this->*load_4chr)(ch1, Address(needle), noreg);
++    sub(result_tmp, haystack_len, 4);
++    slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp
++    add(haystack, haystack, tmp3);
++    neg(hlen_neg, tmp3);
 +
-+  if (CompressedKlassPointers::shift() != 0) {
-+    assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
-+    assert_different_registers(t0, xbase);
-+    shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes);
-+  } else {
-+    add(dst, xbase, src);
++    bind(CH1_LOOP);
++    add(ch2, haystack, hlen_neg);
++    (this->*load_4chr)(ch2, Address(ch2), noreg);
++    beq(ch1, ch2, MATCH);
++    add(hlen_neg, hlen_neg, haystack_chr_size);
++    blez(hlen_neg, CH1_LOOP);
++    j(NOMATCH);
 +  }
 +
-+  if (xbase == xheapbase) { reinit_heapbase(); }
-+}
++  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) {
++    Label CH1_LOOP;
++    BLOCK_COMMENT("string_indexof DO2 {");
++    bind(DO2);
++    (this->*load_2chr)(ch1, Address(needle), noreg);
++    if (needle_con_cnt == 2) {
++      sub(result_tmp, haystack_len, 2);
++    }
++    slli(tmp3, result_tmp, haystack_chr_shift);
++    add(haystack, haystack, tmp3);
++    neg(hlen_neg, tmp3);
 +
-+void MacroAssembler::encode_klass_not_null(Register r) {
-+  encode_klass_not_null(r, r);
-+}
++    bind(CH1_LOOP);
++    add(tmp3, haystack, hlen_neg);
++    (this->*load_2chr)(ch2, Address(tmp3), noreg);
++    beq(ch1, ch2, MATCH);
++    add(hlen_neg, hlen_neg, haystack_chr_size);
++    blez(hlen_neg, CH1_LOOP);
++    j(NOMATCH);
++    BLOCK_COMMENT("} string_indexof DO2");
++  }
 +
-+void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) {
-+  assert(UseCompressedClassPointers, "should only be used for compressed headers");
++  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) {
++    Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
++    BLOCK_COMMENT("string_indexof DO3 {");
 +
-+  if (CompressedKlassPointers::base() == NULL) {
-+    if (CompressedKlassPointers::shift() != 0) {
-+      assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
-+      srli(dst, src, LogKlassAlignmentInBytes);
-+    } else {
-+      mv(dst, src);
++    bind(DO3);
++    (this->*load_2chr)(first, Address(needle), noreg);
++    (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg);
++    if (needle_con_cnt == 3) {
++      sub(result_tmp, haystack_len, 3);
 +    }
-+    return;
-+  }
++    slli(hlen_tmp, result_tmp, haystack_chr_shift);
++    add(haystack, haystack, hlen_tmp);
++    neg(hlen_neg, hlen_tmp);
 +
-+  if (((uint64_t)(uintptr_t)CompressedKlassPointers::base() & 0xffffffff) == 0 &&
-+      CompressedKlassPointers::shift() == 0) {
-+    zero_extend(dst, src, 32);
-+    return;
-+  }
++    bind(FIRST_LOOP);
++    add(ch2, haystack, hlen_neg);
++    (this->*load_2chr)(ch2, Address(ch2), noreg);
++    beq(first, ch2, STR1_LOOP);
 +
-+  Register xbase = dst;
-+  if (dst == src) {
-+    xbase = tmp;
-+  }
++    bind(STR2_NEXT);
++    add(hlen_neg, hlen_neg, haystack_chr_size);
++    blez(hlen_neg, FIRST_LOOP);
++    j(NOMATCH);
 +
-+  assert_different_registers(src, xbase);
-+  li(xbase, (intptr_t)CompressedKlassPointers::base());
-+  sub(dst, src, xbase);
-+  if (CompressedKlassPointers::shift() != 0) {
-+    assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
-+    srli(dst, dst, LogKlassAlignmentInBytes);
-+  }
-+  if (xbase == xheapbase) {
-+    reinit_heapbase();
++    bind(STR1_LOOP);
++    add(hlen_tmp, hlen_neg, 2 * haystack_chr_size);
++    add(ch2, haystack, hlen_tmp);
++    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
++    bne(ch1, ch2, STR2_NEXT);
++    j(MATCH);
++    BLOCK_COMMENT("} string_indexof DO3");
 +  }
-+}
 +
-+void  MacroAssembler::decode_heap_oop_not_null(Register r) {
-+  decode_heap_oop_not_null(r, r);
-+}
++  if (needle_con_cnt == -1 || needle_con_cnt == 1) {
++    Label DO1_LOOP;
 +
-+void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
-+  assert(UseCompressedOops, "should only be used for compressed headers");
-+  assert(Universe::heap() != NULL, "java heap should be initialized");
-+  // Cannot assert, unverified entry point counts instructions (see .ad file)
-+  // vtableStubs also counts instructions in pd_code_size_limit.
-+  // Also do not verify_oop as this is called by verify_oop.
-+  if (CompressedOops::shift() != 0) {
-+    assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
-+    slli(dst, src, LogMinObjAlignmentInBytes);
-+    if (CompressedOops::base() != NULL) {
-+      add(dst, xheapbase, dst);
++    BLOCK_COMMENT("string_indexof DO1 {");
++    bind(DO1);
++    (this->*needle_load_1chr)(ch1, Address(needle), noreg);
++    sub(result_tmp, haystack_len, 1);
++    mv(tmp3, result_tmp);
++    if (haystack_chr_shift) {
++      slli(tmp3, result_tmp, haystack_chr_shift);
 +    }
-+  } else {
-+    assert(CompressedOops::base() == NULL, "sanity");
-+    mv(dst, src);
-+  }
-+}
++    add(haystack, haystack, tmp3);
++    neg(hlen_neg, tmp3);
 +
-+void  MacroAssembler::decode_heap_oop(Register d, Register s) {
-+  if (CompressedOops::base() == NULL) {
-+    if (CompressedOops::shift() != 0 || d != s) {
-+      slli(d, s, CompressedOops::shift());
-+    }
-+  } else {
-+    Label done;
-+    mv(d, s);
-+    beqz(s, done);
-+    shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes);
-+    bind(done);
++    bind(DO1_LOOP);
++    add(tmp3, haystack, hlen_neg);
++    (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg);
++    beq(ch1, ch2, MATCH);
++    add(hlen_neg, hlen_neg, haystack_chr_size);
++    blez(hlen_neg, DO1_LOOP);
++    BLOCK_COMMENT("} string_indexof DO1");
 +  }
-+  verify_oop(d, "broken oop in decode_heap_oop");
-+}
-+
-+void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
-+                                    Register thread_tmp, DecoratorSet decorators) {
-+  access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
-+}
 +
-+void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
-+                                   Register thread_tmp, DecoratorSet decorators) {
-+  access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
-+}
++  bind(NOMATCH);
++  mv(result, -1);
++  j(DONE);
 +
-+void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
-+                                            Register thread_tmp, DecoratorSet decorators) {
-+  access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, thread_tmp);
-+}
++  bind(MATCH);
++  srai(t0, hlen_neg, haystack_chr_shift);
++  add(result, result_tmp, t0);
 +
-+// Used for storing NULLs.
-+void MacroAssembler::store_heap_oop_null(Address dst) {
-+  access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg);
++  bind(DONE);
 +}
 +
-+int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2,
-+                                    bool want_remainder)
++// Compare strings.
++void MacroAssembler::string_compare(Register str1, Register str2,
++                                       Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
++                                       Register tmp3, int ae)
 +{
-+  // Full implementation of Java idiv and irem.  The function
-+  // returns the (pc) offset of the div instruction - may be needed
-+  // for implicit exceptions.
-+  //
-+  // input : rs1: dividend
-+  //         rs2: divisor
-+  //
-+  // result: either
-+  //         quotient  (= rs1 idiv rs2)
-+  //         remainder (= rs1 irem rs2)
++  Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
++          DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
++          SHORT_LOOP_START, TAIL_CHECK, L;
 +
++  const int STUB_THRESHOLD = 64 + 8;
++  bool isLL = ae == StrIntrinsicNode::LL;
++  bool isLU = ae == StrIntrinsicNode::LU;
++  bool isUL = ae == StrIntrinsicNode::UL;
 +
-+  int idivl_offset = offset();
-+  if (!want_remainder) {
-+    divw(result, rs1, rs2);
-+  } else {
-+    remw(result, rs1, rs2); // result = rs1 % rs2;
-+  }
-+  return idivl_offset;
-+}
++  bool str1_isL = isLL || isLU;
++  bool str2_isL = isLL || isUL;
 +
-+int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2,
-+                                    bool want_remainder)
-+{
-+  // Full implementation of Java ldiv and lrem.  The function
-+  // returns the (pc) offset of the div instruction - may be needed
-+  // for implicit exceptions.
-+  //
-+  // input : rs1: dividend
-+  //         rs2: divisor
-+  //
-+  // result: either
-+  //         quotient  (= rs1 idiv rs2)
-+  //         remainder (= rs1 irem rs2)
++  // for L strings, 1 byte for 1 character
++  // for U strings, 2 bytes for 1 character
++  int str1_chr_size = str1_isL ? 1 : 2;
++  int str2_chr_size = str2_isL ? 1 : 2;
++  int minCharsInWord = isLL ? wordSize : wordSize / 2;
 +
-+  int idivq_offset = offset();
-+  if (!want_remainder) {
-+    div(result, rs1, rs2);
-+  } else {
-+    rem(result, rs1, rs2); // result = rs1 % rs2;
++  load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
++  load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
++
++  BLOCK_COMMENT("string_compare {");
++
++  // Bizzarely, the counts are passed in bytes, regardless of whether they
++  // are L or U strings, however the result is always in characters.
++  if (!str1_isL) {
++    sraiw(cnt1, cnt1, 1);
++  }
++  if (!str2_isL) {
++    sraiw(cnt2, cnt2, 1);
 +  }
-+  return idivq_offset;
-+}
 +
-+// Look up the method for a megamorpic invkkeinterface call.
-+// The target method is determined by <intf_klass, itable_index>.
-+// The receiver klass is in recv_klass.
-+// On success, the result will be in method_result, and execution falls through.
-+// On failure, execution transfers to the given label.
-+void MacroAssembler::lookup_interface_method(Register recv_klass,
-+                                             Register intf_klass,
-+                                             RegisterOrConstant itable_index,
-+                                             Register method_result,
-+                                             Register scan_tmp,
-+                                             Label& L_no_such_interface,
-+                                             bool return_method) {
-+  assert_different_registers(recv_klass, intf_klass, scan_tmp);
-+  assert_different_registers(method_result, intf_klass, scan_tmp);
-+  assert(recv_klass != method_result || !return_method,
-+         "recv_klass can be destroyed when mehtid isn't needed");
-+  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
-+         "caller must be same register for non-constant itable index as for method");
++  // Compute the minimum of the string lengths and save the difference in result.
++  sub(result, cnt1, cnt2);
++  bgt(cnt1, cnt2, L);
++  mv(cnt2, cnt1);
++  bind(L);
 +
-+  // Compute start of first itableOffsetEntry (which is at the end of the vtable).
-+  int vtable_base = in_bytes(Klass::vtable_start_offset());
-+  int itentry_off = itableMethodEntry::method_offset_in_bytes();
-+  int scan_step   = itableOffsetEntry::size() * wordSize;
-+  int vte_size    = vtableEntry::size_in_bytes();
-+  assert(vte_size == wordSize, "else adjust times_vte_scale");
++  // A very short string
++  mv(t0, minCharsInWord);
++  ble(cnt2, t0, SHORT_STRING);
 +
-+  lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset()));
++  // Compare longwords
++  // load first parts of strings and finish initialization while loading
++  {
++    if (str1_isL == str2_isL) { // LL or UU
++      // load 8 bytes once to compare
++      ld(tmp1, Address(str1));
++      beq(str1, str2, DONE);
++      ld(tmp2, Address(str2));
++      mv(t0, STUB_THRESHOLD);
++      bge(cnt2, t0, STUB);
++      sub(cnt2, cnt2, minCharsInWord);
++      beqz(cnt2, TAIL_CHECK);
++      // convert cnt2 from characters to bytes
++      if (!str1_isL) {
++        slli(cnt2, cnt2, 1);
++      }
++      add(str2, str2, cnt2);
++      add(str1, str1, cnt2);
++      sub(cnt2, zr, cnt2);
++    } else if (isLU) { // LU case
++      lwu(tmp1, Address(str1));
++      ld(tmp2, Address(str2));
++      mv(t0, STUB_THRESHOLD);
++      bge(cnt2, t0, STUB);
++      addi(cnt2, cnt2, -4);
++      add(str1, str1, cnt2);
++      sub(cnt1, zr, cnt2);
++      slli(cnt2, cnt2, 1);
++      add(str2, str2, cnt2);
++      inflate_lo32(tmp3, tmp1);
++      mv(tmp1, tmp3);
++      sub(cnt2, zr, cnt2);
++      addi(cnt1, cnt1, 4);
++    } else { // UL case
++      ld(tmp1, Address(str1));
++      lwu(tmp2, Address(str2));
++      mv(t0, STUB_THRESHOLD);
++      bge(cnt2, t0, STUB);
++      addi(cnt2, cnt2, -4);
++      slli(t0, cnt2, 1);
++      sub(cnt1, zr, t0);
++      add(str1, str1, t0);
++      add(str2, str2, cnt2);
++      inflate_lo32(tmp3, tmp2);
++      mv(tmp2, tmp3);
++      sub(cnt2, zr, cnt2);
++      addi(cnt1, cnt1, 8);
++    }
++    addi(cnt2, cnt2, isUL ? 4 : 8);
++    bgez(cnt2, TAIL);
++    xorr(tmp3, tmp1, tmp2);
++    bnez(tmp3, DIFFERENCE);
 +
-+  // %%% Could store the aligned, prescaled offset in the klassoop.
-+  shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3);
-+  add(scan_tmp, scan_tmp, vtable_base);
++    // main loop
++    bind(NEXT_WORD);
++    if (str1_isL == str2_isL) { // LL or UU
++      add(t0, str1, cnt2);
++      ld(tmp1, Address(t0));
++      add(t0, str2, cnt2);
++      ld(tmp2, Address(t0));
++      addi(cnt2, cnt2, 8);
++    } else if (isLU) { // LU case
++      add(t0, str1, cnt1);
++      lwu(tmp1, Address(t0));
++      add(t0, str2, cnt2);
++      ld(tmp2, Address(t0));
++      addi(cnt1, cnt1, 4);
++      inflate_lo32(tmp3, tmp1);
++      mv(tmp1, tmp3);
++      addi(cnt2, cnt2, 8);
++    } else { // UL case
++      add(t0, str2, cnt2);
++      lwu(tmp2, Address(t0));
++      add(t0, str1, cnt1);
++      ld(tmp1, Address(t0));
++      inflate_lo32(tmp3, tmp2);
++      mv(tmp2, tmp3);
++      addi(cnt1, cnt1, 8);
++      addi(cnt2, cnt2, 4);
++    }
++    bgez(cnt2, TAIL);
 +
-+  if (return_method) {
-+    // Adjust recv_klass by scaled itable_index, so we can free itable_index.
-+    assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
-+    if (itable_index.is_register()) {
-+      slli(t0, itable_index.as_register(), 3);
-+    } else {
-+      li(t0, itable_index.as_constant() << 3);
++    xorr(tmp3, tmp1, tmp2);
++    beqz(tmp3, NEXT_WORD);
++    j(DIFFERENCE);
++    bind(TAIL);
++    xorr(tmp3, tmp1, tmp2);
++    bnez(tmp3, DIFFERENCE);
++    // Last longword.  In the case where length == 4 we compare the
++    // same longword twice, but that's still faster than another
++    // conditional branch.
++    if (str1_isL == str2_isL) { // LL or UU
++      ld(tmp1, Address(str1));
++      ld(tmp2, Address(str2));
++    } else if (isLU) { // LU case
++      lwu(tmp1, Address(str1));
++      ld(tmp2, Address(str2));
++      inflate_lo32(tmp3, tmp1);
++      mv(tmp1, tmp3);
++    } else { // UL case
++      lwu(tmp2, Address(str2));
++      ld(tmp1, Address(str1));
++      inflate_lo32(tmp3, tmp2);
++      mv(tmp2, tmp3);
 +    }
-+    add(recv_klass, recv_klass, t0);
-+    if (itentry_off) {
-+      add(recv_klass, recv_klass, itentry_off);
++    bind(TAIL_CHECK);
++    xorr(tmp3, tmp1, tmp2);
++    beqz(tmp3, DONE);
++
++    // Find the first different characters in the longwords and
++    // compute their difference.
++    bind(DIFFERENCE);
++    ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb
++    srl(tmp1, tmp1, result);
++    srl(tmp2, tmp2, result);
++    if (isLL) {
++      andi(tmp1, tmp1, 0xFF);
++      andi(tmp2, tmp2, 0xFF);
++    } else {
++      andi(tmp1, tmp1, 0xFFFF);
++      andi(tmp2, tmp2, 0xFFFF);
 +    }
++    sub(result, tmp1, tmp2);
++    j(DONE);
 +  }
 +
-+  Label search, found_method;
-+
-+  ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes()));
-+  beq(intf_klass, method_result, found_method);
-+  bind(search);
-+  // Check that the previous entry is non-null. A null entry means that
-+  // the receiver class doens't implement the interface, and wasn't the
-+  // same as when the caller was compiled.
-+  beqz(method_result, L_no_such_interface, /* is_far */ true);
-+  addi(scan_tmp, scan_tmp, scan_step);
-+  ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes()));
-+  bne(intf_klass, method_result, search);
++  bind(STUB);
++  RuntimeAddress stub = NULL;
++  switch (ae) {
++    case StrIntrinsicNode::LL:
++      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL());
++      break;
++    case StrIntrinsicNode::UU:
++      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU());
++      break;
++    case StrIntrinsicNode::LU:
++      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU());
++      break;
++    case StrIntrinsicNode::UL:
++      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL());
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++  assert(stub.target() != NULL, "compare_long_string stub has not been generated");
++  trampoline_call(stub);
++  j(DONE);
 +
-+  bind(found_method);
++  bind(SHORT_STRING);
++  // Is the minimum length zero?
++  beqz(cnt2, DONE);
++  // arrange code to do most branches while loading and loading next characters
++  // while comparing previous
++  (this->*str1_load_chr)(tmp1, Address(str1), t0);
++  addi(str1, str1, str1_chr_size);
++  addi(cnt2, cnt2, -1);
++  beqz(cnt2, SHORT_LAST_INIT);
++  (this->*str2_load_chr)(cnt1, Address(str2), t0);
++  addi(str2, str2, str2_chr_size);
++  j(SHORT_LOOP_START);
++  bind(SHORT_LOOP);
++  addi(cnt2, cnt2, -1);
++  beqz(cnt2, SHORT_LAST);
++  bind(SHORT_LOOP_START);
++  (this->*str1_load_chr)(tmp2, Address(str1), t0);
++  addi(str1, str1, str1_chr_size);
++  (this->*str2_load_chr)(t0, Address(str2), t0);
++  addi(str2, str2, str2_chr_size);
++  bne(tmp1, cnt1, SHORT_LOOP_TAIL);
++  addi(cnt2, cnt2, -1);
++  beqz(cnt2, SHORT_LAST2);
++  (this->*str1_load_chr)(tmp1, Address(str1), t0);
++  addi(str1, str1, str1_chr_size);
++  (this->*str2_load_chr)(cnt1, Address(str2), t0);
++  addi(str2, str2, str2_chr_size);
++  beq(tmp2, t0, SHORT_LOOP);
++  sub(result, tmp2, t0);
++  j(DONE);
++  bind(SHORT_LOOP_TAIL);
++  sub(result, tmp1, cnt1);
++  j(DONE);
++  bind(SHORT_LAST2);
++  beq(tmp2, t0, DONE);
++  sub(result, tmp2, t0);
 +
-+  // Got a hit.
-+  if (return_method) {
-+    lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset_in_bytes()));
-+    add(method_result, recv_klass, scan_tmp);
-+    ld(method_result, Address(method_result));
-+  }
-+}
++  j(DONE);
++  bind(SHORT_LAST_INIT);
++  (this->*str2_load_chr)(cnt1, Address(str2), t0);
++  addi(str2, str2, str2_chr_size);
++  bind(SHORT_LAST);
++  beq(tmp1, cnt1, DONE);
++  sub(result, tmp1, cnt1);
 +
-+// virtual method calling
-+void MacroAssembler::lookup_virtual_method(Register recv_klass,
-+                                           RegisterOrConstant vtable_index,
-+                                           Register method_result) {
-+  const int base = in_bytes(Klass::vtable_start_offset());
-+  assert(vtableEntry::size() * wordSize == 8,
-+         "adjust the scaling in the code below");
-+  int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes();
++  bind(DONE);
 +
-+  if (vtable_index.is_register()) {
-+    shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord);
-+    ld(method_result, Address(method_result, vtable_offset_in_bytes));
-+  } else {
-+    vtable_offset_in_bytes += vtable_index.as_constant() * wordSize;
-+    ld(method_result, form_address(method_result, recv_klass, vtable_offset_in_bytes));
-+  }
++  BLOCK_COMMENT("} string_compare");
 +}
 +
-+void MacroAssembler::membar(uint32_t order_constraint) {
-+  address prev = pc() - NativeMembar::instruction_size;
-+  address last = code()->last_insn();
++void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
++                                      Register tmp4, Register tmp5, Register tmp6, Register result,
++                                      Register cnt1, int elem_size) {
++  Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR;
++  Register tmp1 = t0;
++  Register tmp2 = t1;
++  Register cnt2 = tmp2;  // cnt2 only used in array length compare
++  Register elem_per_word = tmp6;
++  int log_elem_size = exact_log2(elem_size);
++  int length_offset = arrayOopDesc::length_offset_in_bytes();
++  int base_offset   = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
 +
-+  if (last != NULL && nativeInstruction_at(last)->is_membar() && prev == last) {
-+    NativeMembar *bar = NativeMembar_at(prev);
-+    // We are merging two memory barrier instructions.  On RISCV we
-+    // can do this simply by ORing them together.
-+    bar->set_kind(bar->get_kind() | order_constraint);
-+    BLOCK_COMMENT("merged membar");
-+  } else {
-+    code()->set_last_insn(pc());
++  assert(elem_size == 1 || elem_size == 2, "must be char or byte");
++  assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6);
++  mv(elem_per_word, wordSize / elem_size);
 +
-+    uint32_t predecessor = 0;
-+    uint32_t successor = 0;
++  BLOCK_COMMENT("arrays_equals {");
 +
-+    membar_mask_to_pred_succ(order_constraint, predecessor, successor);
-+    fence(predecessor, successor);
-+  }
-+}
++  // if (a1 == a2), return true
++  beq(a1, a2, SAME);
 +
-+// Form an addres from base + offset in Rd. Rd my or may not
-+// actually be used: you must use the Address that is returned. It
-+// is up to you to ensure that the shift provided mathces the size
-+// of your data.
-+Address MacroAssembler::form_address(Register Rd, Register base, long byte_offset) {
-+  if (is_offset_in_range(byte_offset, 12)) { // 12: imm in range 2^12
-+    return Address(base, byte_offset);
-+  }
++  mv(result, false);
++  beqz(a1, DONE);
++  beqz(a2, DONE);
++  lwu(cnt1, Address(a1, length_offset));
++  lwu(cnt2, Address(a2, length_offset));
++  bne(cnt2, cnt1, DONE);
++  beqz(cnt1, SAME);
 +
-+  // Do it the hard way
-+  mv(Rd, byte_offset);
-+  add(Rd, base, Rd);
-+  return Address(Rd);
-+}
++  slli(tmp5, cnt1, 3 + log_elem_size);
++  sub(tmp5, zr, tmp5);
++  add(a1, a1, base_offset);
++  add(a2, a2, base_offset);
++  ld(tmp3, Address(a1, 0));
++  ld(tmp4, Address(a2, 0));
++  ble(cnt1, elem_per_word, SHORT); // short or same
 +
-+void MacroAssembler::check_klass_subtype(Register sub_klass,
-+                                         Register super_klass,
-+                                         Register tmp_reg,
-+                                         Label& L_success) {
-+  Label L_failure;
-+  check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, NULL);
-+  check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, NULL);
-+  bind(L_failure);
-+}
++  // Main 16 byte comparison loop with 2 exits
++  bind(NEXT_DWORD); {
++    ld(tmp1, Address(a1, wordSize));
++    ld(tmp2, Address(a2, wordSize));
++    sub(cnt1, cnt1, 2 * wordSize / elem_size);
++    blez(cnt1, TAIL);
++    bne(tmp3, tmp4, DONE);
++    ld(tmp3, Address(a1, 2 * wordSize));
++    ld(tmp4, Address(a2, 2 * wordSize));
++    add(a1, a1, 2 * wordSize);
++    add(a2, a2, 2 * wordSize);
++    ble(cnt1, elem_per_word, TAIL2);
++  } beq(tmp1, tmp2, NEXT_DWORD);
++  j(DONE);
 +
-+void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) {
-+  ld(t0, Address(xthread, JavaThread::polling_word_offset()));
-+  if (acquire) {
-+    membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
-+  }
-+  if (at_return) {
-+    bgtu(in_nmethod ? sp : fp, t0, slow_path, true /* is_far */);
-+  } else {
-+    andi(t0, t0, SafepointMechanism::poll_bit());
-+    bnez(t0, slow_path, true /* is_far */);
-+  }
-+}
++  bind(TAIL);
++  xorr(tmp4, tmp3, tmp4);
++  xorr(tmp2, tmp1, tmp2);
++  sll(tmp2, tmp2, tmp5);
++  orr(tmp5, tmp4, tmp2);
++  j(IS_TMP5_ZR);
 +
-+void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
-+                                Label &succeed, Label *fail) {
-+  // oldv holds comparison value
-+  // newv holds value to write in exchange
-+  // addr identifies memory word to compare against/update
-+  Label retry_load, nope;
-+  bind(retry_load);
-+  // Load reserved from the memory location
-+  lr_d(tmp, addr, Assembler::aqrl);
-+  // Fail and exit if it is not what we expect
-+  bne(tmp, oldv, nope);
-+  // If the store conditional succeeds, tmp will be zero
-+  sc_d(tmp, newv, addr, Assembler::rl);
-+  beqz(tmp, succeed);
-+  // Retry only when the store conditional failed
-+  j(retry_load);
++  bind(TAIL2);
++  bne(tmp1, tmp2, DONE);
 +
-+  bind(nope);
-+  membar(AnyAny);
-+  mv(oldv, tmp);
-+  if (fail != NULL) {
-+    j(*fail);
-+  }
-+}
++  bind(SHORT);
++  xorr(tmp4, tmp3, tmp4);
++  sll(tmp5, tmp4, tmp5);
 +
-+void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp,
-+                                        Label &succeed, Label *fail) {
-+  assert(oopDesc::mark_offset_in_bytes() == 0, "assumption");
-+  cmpxchgptr(oldv, newv, obj, tmp, succeed, fail);
-+}
++  bind(IS_TMP5_ZR);
++  bnez(tmp5, DONE);
 +
-+void MacroAssembler::load_reserved(Register addr,
-+                                   enum operand_size size,
-+                                   Assembler::Aqrl acquire) {
-+  switch (size) {
-+    case int64:
-+      lr_d(t0, addr, acquire);
-+      break;
-+    case int32:
-+      lr_w(t0, addr, acquire);
-+      break;
-+    case uint32:
-+      lr_w(t0, addr, acquire);
-+      zero_extend(t0, t0, 32);
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
-+}
++  bind(SAME);
++  mv(result, true);
++  // That's it.
++  bind(DONE);
 +
-+void MacroAssembler::store_conditional(Register addr,
-+                                       Register new_val,
-+                                       enum operand_size size,
-+                                       Assembler::Aqrl release) {
-+  switch (size) {
-+    case int64:
-+      sc_d(t0, new_val, addr, release);
-+      break;
-+    case int32:
-+    case uint32:
-+      sc_w(t0, new_val, addr, release);
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
++  BLOCK_COMMENT("} array_equals");
 +}
 +
++// Compare Strings
 +
-+void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected,
-+                                                 Register new_val,
-+                                                 enum operand_size size,
-+                                                 Register tmp1, Register tmp2, Register tmp3) {
-+  assert(size == int8 || size == int16, "unsupported operand size");
++// For Strings we're passed the address of the first characters in a1
++// and a2 and the length in cnt1.
++// elem_size is the element size in bytes: either 1 or 2.
++// There are two implementations.  For arrays >= 8 bytes, all
++// comparisons (including the final one, which may overlap) are
++// performed 8 bytes at a time.  For strings < 8 bytes, we compare a
++// halfword, then a short, and then a byte.
 +
-+  Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3;
++void MacroAssembler::string_equals(Register a1, Register a2,
++                                      Register result, Register cnt1, int elem_size)
++{
++  Label SAME, DONE, SHORT, NEXT_WORD;
++  Register tmp1 = t0;
++  Register tmp2 = t1;
 +
-+  andi(shift, addr, 3);
-+  slli(shift, shift, 3);
++  assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte");
++  assert_different_registers(a1, a2, result, cnt1, t0, t1);
 +
-+  andi(aligned_addr, addr, ~3);
++  BLOCK_COMMENT("string_equals {");
 +
-+  if (size == int8) {
-+    addi(mask, zr, 0xff);
-+  } else {
-+    // size == int16 case
-+    addi(mask, zr, -1);
-+    zero_extend(mask, mask, 16);
-+  }
-+  sll(mask, mask, shift);
++  mv(result, false);
 +
-+  xori(not_mask, mask, -1);
++  // Check for short strings, i.e. smaller than wordSize.
++  sub(cnt1, cnt1, wordSize);
++  bltz(cnt1, SHORT);
 +
-+  sll(expected, expected, shift);
-+  andr(expected, expected, mask);
++  // Main 8 byte comparison loop.
++  bind(NEXT_WORD); {
++    ld(tmp1, Address(a1, 0));
++    add(a1, a1, wordSize);
++    ld(tmp2, Address(a2, 0));
++    add(a2, a2, wordSize);
++    sub(cnt1, cnt1, wordSize);
++    bne(tmp1, tmp2, DONE);
++  } bgtz(cnt1, NEXT_WORD);
 +
-+  sll(new_val, new_val, shift);
-+  andr(new_val, new_val, mask);
-+}
++  // Last longword.  In the case where length == 4 we compare the
++  // same longword twice, but that's still faster than another
++  // conditional branch.
++  // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
++  // length == 4.
++  add(tmp1, a1, cnt1);
++  ld(tmp1, Address(tmp1, 0));
++  add(tmp2, a2, cnt1);
++  ld(tmp2, Address(tmp2, 0));
++  bne(tmp1, tmp2, DONE);
++  j(SAME);
 +
-+// cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps.
-+// It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w,
-+// which are forced to work with 4-byte aligned address.
-+void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected,
-+                                          Register new_val,
-+                                          enum operand_size size,
-+                                          Assembler::Aqrl acquire, Assembler::Aqrl release,
-+                                          Register result, bool result_as_bool,
-+                                          Register tmp1, Register tmp2, Register tmp3) {
-+  Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0;
-+  assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
-+  cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
-+
-+  Label retry, fail, done;
-+
-+  bind(retry);
-+  lr_w(old, aligned_addr, acquire);
-+  andr(tmp, old, mask);
-+  bne(tmp, expected, fail);
-+
-+  andr(tmp, old, not_mask);
-+  orr(tmp, tmp, new_val);
-+  sc_w(tmp, tmp, aligned_addr, release);
-+  bnez(tmp, retry);
-+
-+  if (result_as_bool) {
-+    addi(result, zr, 1);
-+    j(done);
-+
-+    bind(fail);
-+    mv(result, zr);
++  bind(SHORT);
++  Label TAIL03, TAIL01;
 +
-+    bind(done);
-+  } else {
-+    andr(tmp, old, mask);
++  // 0-7 bytes left.
++  andi(t0, cnt1, 4);
++  beqz(t0, TAIL03);
++  {
++    lwu(tmp1, Address(a1, 0));
++    add(a1, a1, 4);
++    lwu(tmp2, Address(a2, 0));
++    add(a2, a2, 4);
++    bne(tmp1, tmp2, DONE);
++  }
 +
-+    bind(fail);
-+    srl(result, tmp, shift);
++  bind(TAIL03);
++  // 0-3 bytes left.
++  andi(t0, cnt1, 2);
++  beqz(t0, TAIL01);
++  {
++    lhu(tmp1, Address(a1, 0));
++    add(a1, a1, 2);
++    lhu(tmp2, Address(a2, 0));
++    add(a2, a2, 2);
++    bne(tmp1, tmp2, DONE);
++  }
 +
-+    if (size == int8) {
-+      sign_extend(result, result, 8);
-+    } else {
-+      // size == int16 case
-+      sign_extend(result, result, 16);
++  bind(TAIL01);
++  if (elem_size == 1) { // Only needed when comparing 1-byte elements
++    // 0-1 bytes left.
++    andi(t0, cnt1, 1);
++    beqz(t0, SAME);
++    {
++      lbu(tmp1, a1, 0);
++      lbu(tmp2, a2, 0);
++      bne(tmp1, tmp2, DONE);
 +    }
 +  }
-+}
-+
-+// weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement
-+// the weak CAS stuff. The major difference is that it just failed when store conditional
-+// failed.
-+void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected,
-+                                               Register new_val,
-+                                               enum operand_size size,
-+                                               Assembler::Aqrl acquire, Assembler::Aqrl release,
-+                                               Register result,
-+                                               Register tmp1, Register tmp2, Register tmp3) {
-+  Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0;
-+  assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
-+  cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
 +
-+  Label succ, fail, done;
++  // Arrays are equal.
++  bind(SAME);
++  mv(result, true);
 +
-+  lr_w(old, aligned_addr, acquire);
-+  andr(tmp, old, mask);
-+  bne(tmp, expected, fail);
++  // That's it.
++  bind(DONE);
++  BLOCK_COMMENT("} string_equals");
++}
 +
-+  andr(tmp, old, not_mask);
-+  orr(tmp, tmp, new_val);
-+  sc_w(tmp, tmp, aligned_addr, release);
-+  beqz(tmp, succ);
++typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far);
++typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label,
++                                                              bool is_far, bool is_unordered);
 +
-+  bind(fail);
-+  addi(result, zr, 1);
-+  j(done);
++static conditional_branch_insn conditional_branches[] =
++{
++  /* SHORT branches */
++  (conditional_branch_insn)&Assembler::beq,
++  (conditional_branch_insn)&Assembler::bgt,
++  NULL, // BoolTest::overflow
++  (conditional_branch_insn)&Assembler::blt,
++  (conditional_branch_insn)&Assembler::bne,
++  (conditional_branch_insn)&Assembler::ble,
++  NULL, // BoolTest::no_overflow
++  (conditional_branch_insn)&Assembler::bge,
 +
-+  bind(succ);
-+  mv(result, zr);
++  /* UNSIGNED branches */
++  (conditional_branch_insn)&Assembler::beq,
++  (conditional_branch_insn)&Assembler::bgtu,
++  NULL,
++  (conditional_branch_insn)&Assembler::bltu,
++  (conditional_branch_insn)&Assembler::bne,
++  (conditional_branch_insn)&Assembler::bleu,
++  NULL,
++  (conditional_branch_insn)&Assembler::bgeu
++};
 +
-+  bind(done);
-+}
++static float_conditional_branch_insn float_conditional_branches[] =
++{
++  /* FLOAT SHORT branches */
++  (float_conditional_branch_insn)&MacroAssembler::float_beq,
++  (float_conditional_branch_insn)&MacroAssembler::float_bgt,
++  NULL,  // BoolTest::overflow
++  (float_conditional_branch_insn)&MacroAssembler::float_blt,
++  (float_conditional_branch_insn)&MacroAssembler::float_bne,
++  (float_conditional_branch_insn)&MacroAssembler::float_ble,
++  NULL, // BoolTest::no_overflow
++  (float_conditional_branch_insn)&MacroAssembler::float_bge,
 +
-+void MacroAssembler::cmpxchg(Register addr, Register expected,
-+                             Register new_val,
-+                             enum operand_size size,
-+                             Assembler::Aqrl acquire, Assembler::Aqrl release,
-+                             Register result, bool result_as_bool) {
-+  assert(size != int8 && size != int16, "unsupported operand size");
++  /* DOUBLE SHORT branches */
++  (float_conditional_branch_insn)&MacroAssembler::double_beq,
++  (float_conditional_branch_insn)&MacroAssembler::double_bgt,
++  NULL,
++  (float_conditional_branch_insn)&MacroAssembler::double_blt,
++  (float_conditional_branch_insn)&MacroAssembler::double_bne,
++  (float_conditional_branch_insn)&MacroAssembler::double_ble,
++  NULL,
++  (float_conditional_branch_insn)&MacroAssembler::double_bge
++};
 +
-+  Label retry_load, done, ne_done;
-+  bind(retry_load);
-+  load_reserved(addr, size, acquire);
-+  bne(t0, expected, ne_done);
-+  store_conditional(addr, new_val, size, release);
-+  bnez(t0, retry_load);
++void MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) {
++  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])),
++         "invalid conditional branch index");
++  (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far);
++}
 +
-+  // equal, succeed
-+  if (result_as_bool) {
-+    li(result, 1);
-+  } else {
-+    mv(result, expected);
-+  }
-+  j(done);
++// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use
++// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode().
++void MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) {
++  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])),
++         "invalid float conditional branch index");
++  int booltest_flag = cmpFlag & ~(MacroAssembler::double_branch_mask);
++  (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far,
++                                               (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true);
++}
 +
-+  // not equal, failed
-+  bind(ne_done);
-+  if (result_as_bool) {
-+    mv(result, zr);
-+  } else {
-+    mv(result, t0);
++void MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
++  switch (cmpFlag) {
++    case BoolTest::eq:
++    case BoolTest::le:
++      beqz(op1, L, is_far);
++      break;
++    case BoolTest::ne:
++    case BoolTest::gt:
++      bnez(op1, L, is_far);
++      break;
++    default:
++      ShouldNotReachHere();
 +  }
-+
-+  bind(done);
 +}
 +
-+void MacroAssembler::cmpxchg_weak(Register addr, Register expected,
-+                                  Register new_val,
-+                                  enum operand_size size,
-+                                  Assembler::Aqrl acquire, Assembler::Aqrl release,
-+                                  Register result) {
-+  Label fail, done, sc_done;
-+  load_reserved(addr, size, acquire);
-+  bne(t0, expected, fail);
-+  store_conditional(addr, new_val, size, release);
-+  beqz(t0, sc_done);
-+
-+  // fail
-+  bind(fail);
-+  li(result, 1);
-+  j(done);
-+
-+  // sc_done
-+  bind(sc_done);
-+  mv(result, 0);
-+  bind(done);
++void MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
++  switch (cmpFlag) {
++    case BoolTest::eq:
++      beqz(op1, L, is_far);
++      break;
++    case BoolTest::ne:
++      bnez(op1, L, is_far);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
 +}
 +
-+#define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE)                                              \
-+void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \
-+  prev = prev->is_valid() ? prev : zr;                                                      \
-+  if (incr.is_register()) {                                                                 \
-+    AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE));              \
-+  } else {                                                                                  \
-+    mv(t0, incr.as_constant());                                                             \
-+    AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE));                              \
-+  }                                                                                         \
-+  return;                                                                                   \
++void MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) {
++  Label L;
++  cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L);
++  mv(dst, src);
++  bind(L);
 +}
 +
-+ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed)
-+ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed)
-+ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl)
-+ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl)
-+
-+#undef ATOMIC_OP
++// Set dst to NaN if any NaN input.
++void MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2,
++                                  bool is_double, bool is_min) {
++  assert_different_registers(dst, src1, src2);
 +
-+#define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE)                                       \
-+void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) {      \
-+  prev = prev->is_valid() ? prev : zr;                                               \
-+  AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE));                       \
-+  return;                                                                            \
-+}
++  Label Done;
++  fsflags(zr);
++  if (is_double) {
++    is_min ? fmin_d(dst, src1, src2)
++           : fmax_d(dst, src1, src2);
++    // Checking NaNs
++    flt_d(zr, src1, src2);
++  } else {
++    is_min ? fmin_s(dst, src1, src2)
++           : fmax_s(dst, src1, src2);
++    // Checking NaNs
++    flt_s(zr, src1, src2);
++  }
 +
-+ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed)
-+ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed)
-+ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl)
-+ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl)
++  frflags(t0);
++  beqz(t0, Done);
 +
-+#undef ATOMIC_XCHG
++  // In case of NaNs
++  is_double ? fadd_d(dst, src1, src2)
++            : fadd_s(dst, src1, src2);
 +
-+#define ATOMIC_XCHGU(OP1, OP2)                                                       \
-+void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) {     \
-+  atomic_##OP2(prev, newv, addr);                                                    \
-+  zero_extend(prev, prev, 32);                                                       \
-+  return;                                                                            \
++  bind(Done);
 +}
 +
-+ATOMIC_XCHGU(xchgwu, xchgw)
-+ATOMIC_XCHGU(xchgalwu, xchgalw)
++#endif // COMPILER2
 +
-+#undef ATOMIC_XCHGU
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+new file mode 100644
+index 0000000000..eca18b51f3
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+@@ -0,0 +1,984 @@
++/*
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
-+  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
-+  assert(CodeCache::find_blob(entry.target()) != NULL,
-+         "destination of far call not found in code cache");
-+  int32_t offset = 0;
-+  if (far_branches()) {
-+    // We can use auipc + jalr here because we know that the total size of
-+    // the code cache cannot exceed 2Gb.
-+    la_patchable(tmp, entry, offset);
-+    if (cbuf != NULL) { cbuf->set_insts_mark(); }
-+    jalr(x0, tmp, offset);
-+  } else {
-+    if (cbuf != NULL) { cbuf->set_insts_mark(); }
-+    j(entry);
-+  }
-+}
++#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP
++#define CPU_RISCV_MACROASSEMBLER_RISCV_HPP
 +
-+void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) {
-+  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
-+  assert(CodeCache::find_blob(entry.target()) != NULL,
-+         "destination of far call not found in code cache");
-+  int32_t offset = 0;
-+  if (far_branches()) {
-+    // We can use auipc + jalr here because we know that the total size of
-+    // the code cache cannot exceed 2Gb.
-+    la_patchable(tmp, entry, offset);
-+    if (cbuf != NULL) { cbuf->set_insts_mark(); }
-+    jalr(x1, tmp, offset); // link
-+  } else {
-+    if (cbuf != NULL) { cbuf->set_insts_mark(); }
-+    jal(entry); // link
-+  }
-+}
++#include "asm/assembler.hpp"
++#include "metaprogramming/enableIf.hpp"
++#include "nativeInst_riscv.hpp"
 +
-+void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
-+                                                   Register super_klass,
-+                                                   Register tmp_reg,
-+                                                   Label* L_success,
-+                                                   Label* L_failure,
-+                                                   Label* L_slow_path,
-+                                                   Register super_check_offset) {
-+  assert_different_registers(sub_klass, super_klass, tmp_reg);
-+  bool must_load_sco = (super_check_offset == noreg);
-+  if (must_load_sco) {
-+    assert(tmp_reg != noreg, "supply either a temp or a register offset");
-+  } else {
-+    assert_different_registers(sub_klass, super_klass, super_check_offset);
-+  }
++// MacroAssembler extends Assembler by frequently used macros.
++//
++// Instructions for which a 'better' code sequence exists depending
++// on arguments should also go in here.
 +
-+  Label L_fallthrough;
-+  int label_nulls = 0;
-+  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
-+  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
-+  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
-+  assert(label_nulls <= 1, "at most one NULL in batch");
++class MacroAssembler: public Assembler {
 +
-+  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
-+  int sco_offset = in_bytes(Klass::super_check_offset_offset());
-+  Address super_check_offset_addr(super_klass, sco_offset);
++ public:
++  MacroAssembler(CodeBuffer* code) : Assembler(code) {
++  }
++  virtual ~MacroAssembler() {}
 +
-+  // Hacked jmp, which may only be used just before L_fallthrough.
-+#define final_jmp(label)                                                \
-+  if (&(label) == &L_fallthrough) { /*do nothing*/ }                    \
-+  else                            j(label)             /*omit semi*/
++  void safepoint_poll(Label& slow_path);
++  void safepoint_poll_acquire(Label& slow_path);
 +
-+  // If the pointers are equal, we are done (e.g., String[] elements).
-+  // This self-check enables sharing of secondary supertype arrays among
-+  // non-primary types such as array-of-interface. Otherwise, each such
-+  // type would need its own customized SSA.
-+  // We move this check to the front fo the fast path because many
-+  // type checks are in fact trivially successful in this manner,
-+  // so we get a nicely predicted branch right at the start of the check.
-+  beq(sub_klass, super_klass, *L_success);
++  // Biased locking support
++  // lock_reg and obj_reg must be loaded up with the appropriate values.
++  // swap_reg is killed.
++  // tmp_reg must be supplied and must not be rscratch1 or rscratch2
++  // Optional slow case is for implementations (interpreter and C1) which branch to
++  // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
++  // Returns offset of first potentially-faulting instruction for null
++  // check info (currently consumed only by C1). If
++  // swap_reg_contains_mark is true then returns -1 as it is assumed
++  // the calling code has already passed any potential faults.
++  int biased_locking_enter(Register lock_reg, Register obj_reg,
++                           Register swap_reg, Register tmp_reg,
++                           bool swap_reg_contains_mark,
++                           Label& done, Label* slow_case = NULL,
++                           BiasedLockingCounters* counters = NULL,
++                           Register flag = noreg);
++  void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done, Register flag = noreg);
 +
-+  // Check the supertype display:
-+  if (must_load_sco) {
-+    lwu(tmp_reg, super_check_offset_addr);
-+    super_check_offset = tmp_reg;
++  // Helper functions for statistics gathering.
++  // Unconditional atomic increment.
++  void atomic_incw(Register counter_addr, Register tmp);
++  void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) {
++    la(tmp1, counter_addr);
++    atomic_incw(tmp1, tmp2);
 +  }
-+  add(t0, sub_klass, super_check_offset);
-+  Address super_check_addr(t0);
-+  ld(t0, super_check_addr); // load displayed supertype
-+
-+  // Ths check has worked decisively for primary supers.
-+  // Secondary supers are sought in the super_cache ('super_cache_addr').
-+  // (Secondary supers are interfaces and very deeply nested subtypes.)
-+  // This works in the same check above because of a tricky aliasing
-+  // between the super_Cache and the primary super dispaly elements.
-+  // (The 'super_check_addr' can address either, as the case requires.)
-+  // Note that the cache is updated below if it does not help us find
-+  // what we need immediately.
-+  // So if it was a primary super, we can just fail immediately.
-+  // Otherwise, it's the slow path for us (no success at this point).
 +
-+  beq(super_klass, t0, *L_success);
-+  mv(t1, sc_offset);
-+  if (L_failure == &L_fallthrough) {
-+    beq(super_check_offset, t1, *L_slow_path);
-+  } else {
-+    bne(super_check_offset, t1, *L_failure, /* is_far */ true);
-+    final_jmp(*L_slow_path);
++  // Alignment
++  void align(int modulus, int extra_offset = 0);
++  static inline void assert_alignment(address pc, int alignment = NativeInstruction::instruction_size) {
++    assert(is_aligned(pc, alignment), "bad alignment");
 +  }
 +
-+  bind(L_fallthrough);
-+
-+#undef final_jmp
-+}
-+
-+// Scans count pointer sized words at [addr] for occurence of value,
-+// generic
-+void MacroAssembler::repne_scan(Register addr, Register value, Register count,
-+                                Register tmp) {
-+  Label Lloop, Lexit;
-+  beqz(count, Lexit);
-+  bind(Lloop);
-+  ld(tmp, addr);
-+  beq(value, tmp, Lexit);
-+  add(addr, addr, wordSize);
-+  sub(count, count, 1);
-+  bnez(count, Lloop);
-+  bind(Lexit);
-+}
-+
-+void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
-+                                                   Register super_klass,
-+                                                   Register tmp1_reg,
-+                                                   Register tmp2_reg,
-+                                                   Label* L_success,
-+                                                   Label* L_failure) {
-+  assert_different_registers(sub_klass, super_klass, tmp1_reg);
-+  if (tmp2_reg != noreg) {
-+    assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0);
++  // Stack frame creation/removal
++  // Note that SP must be updated to the right place before saving/restoring RA and FP
++  // because signal based thread suspend/resume could happen asynchronously.
++  void enter() {
++    addi(sp, sp, - 2 * wordSize);
++    sd(ra, Address(sp, wordSize));
++    sd(fp, Address(sp));
++    addi(fp, sp, 2 * wordSize);
 +  }
-+#define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg)
 +
-+  Label L_fallthrough;
-+  int label_nulls = 0;
-+  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
-+  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
++  void leave() {
++    addi(sp, fp, - 2 * wordSize);
++    ld(fp, Address(sp));
++    ld(ra, Address(sp, wordSize));
++    addi(sp, sp, 2 * wordSize);
++  }
 +
-+  assert(label_nulls <= 1, "at most one NULL in the batch");
 +
-+  // A couple of usefule fields in sub_klass:
-+  int ss_offset = in_bytes(Klass::secondary_supers_offset());
-+  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
-+  Address secondary_supers_addr(sub_klass, ss_offset);
-+  Address super_cache_addr(     sub_klass, sc_offset);
++  // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
++  // The pointer will be loaded into the thread register.
++  void get_thread(Register thread);
 +
-+  BLOCK_COMMENT("check_klass_subtype_slow_path");
++  // Support for VM calls
++  //
++  // It is imperative that all calls into the VM are handled via the call_VM macros.
++  // They make sure that the stack linkage is setup correctly. call_VM's correspond
++  // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
 +
-+  // Do a linear scan of the secondary super-klass chain.
-+  // This code is rarely used, so simplicity is a virtue here.
-+  // The repne_scan instruction uses fixed registers, which we must spill.
-+  // Don't worry too much about pre-existing connecitons with the input regs.
++  void call_VM(Register oop_result,
++               address entry_point,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1, Register arg_2,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1, Register arg_2, Register arg_3,
++               bool check_exceptions = true);
 +
-+  assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super)
-+  assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter)
++  // Overloadings with last_Java_sp
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               int number_of_arguments = 0,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, Register arg_2,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, Register arg_2, Register arg_3,
++               bool check_exceptions = true);
 +
-+  RegSet pushed_registers;
-+  if (!IS_A_TEMP(x12)) {
-+    pushed_registers += x12;
-+  }
-+  if (!IS_A_TEMP(x15)) {
-+    pushed_registers += x15;
-+  }
++  void get_vm_result(Register oop_result, Register java_thread);
++  void get_vm_result_2(Register metadata_result, Register java_thread);
 +
-+  if (super_klass != x10 || UseCompressedOops) {
-+    if (!IS_A_TEMP(x10)) {
-+      pushed_registers += x10;
-+    }
-+  }
++  // These always tightly bind to MacroAssembler::call_VM_leaf_base
++  // bypassing the virtual implementation
++  void call_VM_leaf(address entry_point,
++                    int number_of_arguments = 0);
++  void call_VM_leaf(address entry_point,
++                    Register arg_0);
++  void call_VM_leaf(address entry_point,
++                    Register arg_0, Register arg_1);
++  void call_VM_leaf(address entry_point,
++                    Register arg_0, Register arg_1, Register arg_2);
 +
-+  push_reg(pushed_registers, sp);
++  // These always tightly bind to MacroAssembler::call_VM_base
++  // bypassing the virtual implementation
++  void super_call_VM_leaf(address entry_point, Register arg_0);
++  void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1);
++  void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2);
++  void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3);
 +
-+  // Get super_klass value into x10 (even if it was in x15 or x12)
-+  mv(x10, super_klass);
++  // last Java Frame (fills frame anchor)
++  void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp);
++  void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp);
++  void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc, Register tmp);
 +
-+#ifndef PRODUCT
-+  mv(t1, (address)&SharedRuntime::_partial_subtype_ctr);
-+  Address pst_counter_addr(t1);
-+  ld(t0, pst_counter_addr);
-+  add(t0, t0, 1);
-+  sd(t0, pst_counter_addr);
-+#endif // PRODUCT
++  // thread in the default location (xthread)
++  void reset_last_Java_frame(bool clear_fp);
 +
-+  // We will consult the secondary-super array.
-+  ld(x15, secondary_supers_addr);
-+  // Load the array length.
-+  lwu(x12, Address(x15, Array<Klass*>::length_offset_in_bytes()));
-+  // Skip to start of data.
-+  add(x15, x15, Array<Klass*>::base_offset_in_bytes());
++  virtual void call_VM_leaf_base(
++    address entry_point,                // the entry point
++    int     number_of_arguments,        // the number of arguments to pop after the call
++    Label*  retaddr = NULL
++  );
 +
-+  // Set t0 to an obvious invalid value, falling through by default
-+  li(t0, -1);
-+  // Scan X12 words at [X15] for an occurrence of X10.
-+  repne_scan(x15, x10, x12, t0);
++  virtual void call_VM_leaf_base(
++    address entry_point,                // the entry point
++    int     number_of_arguments,        // the number of arguments to pop after the call
++    Label&  retaddr) {
++    call_VM_leaf_base(entry_point, number_of_arguments, &retaddr);
++  }
 +
-+  // pop will restore x10, so we should use a temp register to keep its value
-+  mv(t1, x10);
++  virtual void call_VM_base(           // returns the register containing the thread upon return
++    Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
++    Register java_thread,              // the thread if computed before     ; use noreg otherwise
++    Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
++    address  entry_point,              // the entry point
++    int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
++    bool     check_exceptions          // whether to check for pending exceptions after return
++  );
 +
-+  // Unspill the temp registers:
-+  pop_reg(pushed_registers, sp);
++  void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions);
 +
-+  bne(t1, t0, *L_failure);
++  virtual void check_and_handle_earlyret(Register java_thread);
++  virtual void check_and_handle_popframe(Register java_thread);
 +
-+  // Success. Cache the super we found an proceed in triumph.
-+  sd(super_klass, super_cache_addr);
++  void resolve_oop_handle(Register result, Register tmp = x15);
++  void resolve_jobject(Register value, Register thread, Register tmp);
 +
-+  if (L_success != &L_fallthrough) {
-+    j(*L_success);
-+  }
++  void movoop(Register dst, jobject obj, bool immediate = false);
++  void mov_metadata(Register dst, Metadata* obj);
++  void bang_stack_size(Register size, Register tmp);
++  void set_narrow_oop(Register dst, jobject obj);
++  void set_narrow_klass(Register dst, Klass* k);
 +
-+#undef IS_A_TEMP
++  void load_mirror(Register dst, Register method, Register tmp = x15);
++  void access_load_at(BasicType type, DecoratorSet decorators, Register dst,
++                      Address src, Register tmp1, Register thread_tmp);
++  void access_store_at(BasicType type, DecoratorSet decorators, Address dst,
++                       Register src, Register tmp1, Register thread_tmp);
++  void load_klass(Register dst, Register src);
++  void store_klass(Register dst, Register src);
++  void cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L);
 +
-+  bind(L_fallthrough);
-+}
++  void encode_klass_not_null(Register r);
++  void decode_klass_not_null(Register r);
++  void encode_klass_not_null(Register dst, Register src, Register tmp = xheapbase);
++  void decode_klass_not_null(Register dst, Register src, Register tmp = xheapbase);
++  void decode_heap_oop_not_null(Register r);
++  void decode_heap_oop_not_null(Register dst, Register src);
++  void decode_heap_oop(Register d, Register s);
++  void decode_heap_oop(Register r) { decode_heap_oop(r, r); }
++  void encode_heap_oop(Register d, Register s);
++  void encode_heap_oop(Register r) { encode_heap_oop(r, r); };
++  void load_heap_oop(Register dst, Address src, Register tmp1 = noreg,
++                     Register thread_tmp = noreg, DecoratorSet decorators = 0);
++  void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg,
++                              Register thread_tmp = noreg, DecoratorSet decorators = 0);
++  void store_heap_oop(Address dst, Register src, Register tmp1 = noreg,
++                      Register thread_tmp = noreg, DecoratorSet decorators = 0);
 +
-+// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes.
-+void MacroAssembler::tlab_allocate(Register obj,
-+                                   Register var_size_in_bytes,
-+                                   int con_size_in_bytes,
-+                                   Register tmp1,
-+                                   Register tmp2,
-+                                   Label& slow_case,
-+                                   bool is_far) {
-+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+  bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far);
-+}
++  void store_klass_gap(Register dst, Register src);
 +
-+// Defines obj, preserves var_size_in_bytes
-+void MacroAssembler::eden_allocate(Register obj,
-+                                   Register var_size_in_bytes,
-+                                   int con_size_in_bytes,
-+                                   Register tmp,
-+                                   Label& slow_case,
-+                                   bool is_far) {
-+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+  bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp, slow_case, is_far);
-+}
++  // currently unimplemented
++  // Used for storing NULL. All other oop constants should be
++  // stored using routines that take a jobject.
++  void store_heap_oop_null(Address dst);
 +
++  void load_prototype_header(Register dst, Register src);
 +
-+// get_thread() can be called anywhere inside generated code so we
-+// need to save whatever non-callee save context might get clobbered
-+// by the call to Thread::current() or, indeed, the call setup code.
-+void MacroAssembler::get_thread(Register thread) {
-+  // save all call-clobbered regs except thread
-+  RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) +
-+                      RegSet::range(x28, x31) + ra - thread;
-+  push_reg(saved_regs, sp);
++  // This dummy is to prevent a call to store_heap_oop from
++  // converting a zero (linke NULL) into a Register by giving
++  // the compiler two choices it can't resolve
 +
-+  int32_t offset = 0;
-+  movptr_with_offset(ra, CAST_FROM_FN_PTR(address, Thread::current), offset);
-+  jalr(ra, ra, offset);
-+  if (thread != x10) {
-+    mv(thread, x10);
-+  }
++  void store_heap_oop(Address dst, void* dummy);
 +
-+  // restore pushed registers
-+  pop_reg(saved_regs, sp);
-+}
++  // Support for NULL-checks
++  //
++  // Generates code that causes a NULL OS exception if the content of reg is NULL.
++  // If the accessed location is M[reg + offset] and the offset is known, provide the
++  // offset. No explicit code generateion is needed if the offset is within a certain
++  // range (0 <= offset <= page_size).
 +
-+void MacroAssembler::load_byte_map_base(Register reg) {
-+  CardTable::CardValue* byte_map_base =
-+    ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
-+  li(reg, (uint64_t)byte_map_base);
-+}
++  virtual void null_check(Register reg, int offset = -1);
++  static bool needs_explicit_null_check(intptr_t offset);
++  static bool uses_implicit_null_check(void* address);
 +
-+void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &offset) {
-+  relocInfo::relocType rtype = dest.rspec().reloc()->type();
-+  unsigned long low_address = (uintptr_t)CodeCache::low_bound();
-+  unsigned long high_address = (uintptr_t)CodeCache::high_bound();
-+  unsigned long dest_address = (uintptr_t)dest.target();
-+  long offset_low = dest_address - low_address;
-+  long offset_high = dest_address - high_address;
++  // idiv variant which deals with MINLONG as dividend and -1 as divisor
++  int corrected_idivl(Register result, Register rs1, Register rs2,
++                      bool want_remainder);
++  int corrected_idivq(Register result, Register rs1, Register rs2,
++                      bool want_remainder);
 +
-+  assert(is_valid_riscv64_address(dest.target()), "bad address");
-+  assert(dest.getMode() == Address::literal, "la_patchable must be applied to a literal address");
++  // interface method calling
++  void lookup_interface_method(Register recv_klass,
++                               Register intf_klass,
++                               RegisterOrConstant itable_index,
++                               Register method_result,
++                               Register scan_tmp,
++                               Label& no_such_interface,
++                               bool return_method = true);
 +
-+  InstructionMark im(this);
-+  code_section()->relocate(inst_mark(), dest.rspec());
-+  // RISC-V doesn't compute a page-aligned address, in order to partially
-+  // compensate for the use of *signed* offsets in its base+disp12
-+  // addressing mode (RISC-V's PC-relative reach remains asymmetric
-+  // [-(2G + 2K), 2G - 2k).
-+  if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) {
-+    int64_t distance = dest.target() - pc();
-+    auipc(reg1, (int32_t)distance + 0x800);
-+    offset = ((int32_t)distance << 20) >> 20;
-+  } else {
-+    movptr_with_offset(reg1, dest.target(), offset);
-+  }
-+}
++  // virtual method calling
++  // n.n. x86 allows RegisterOrConstant for vtable_index
++  void lookup_virtual_method(Register recv_klass,
++                             RegisterOrConstant vtable_index,
++                             Register method_result);
 +
-+void MacroAssembler::build_frame(int framesize) {
-+  assert(framesize >= 2, "framesize must include space for FP/RA");
-+  assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
-+  sub(sp, sp, framesize);
-+  sd(fp, Address(sp, framesize - 2 * wordSize));
-+  sd(ra, Address(sp, framesize - wordSize));
-+  if (PreserveFramePointer) { add(fp, sp, framesize); }
-+  verify_cross_modify_fence_not_required();
-+}
++  // Form an addres from base + offset in Rd. Rd my or may not
++  // actually be used: you must use the Address that is returned. It
++  // is up to you to ensure that the shift provided mathces the size
++  // of your data.
++  Address form_address(Register Rd, Register base, long byte_offset);
 +
-+void MacroAssembler::remove_frame(int framesize) {
-+  assert(framesize >= 2, "framesize must include space for FP/RA");
-+  assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
-+  ld(fp, Address(sp, framesize - 2 * wordSize));
-+  ld(ra, Address(sp, framesize - wordSize));
-+  add(sp, sp, framesize);
-+}
++  // allocation
++  void tlab_allocate(
++    Register obj,                   // result: pointer to object after successful allocation
++    Register var_size_in_bytes,     // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,     // object size in bytes if   known at compile time
++    Register tmp1,                  // temp register
++    Register tmp2,                  // temp register
++    Label&   slow_case,             // continuation point of fast allocation fails
++    bool is_far = false
++  );
 +
-+void MacroAssembler::reserved_stack_check() {
-+    // testing if reserved zone needs to be enabled
-+    Label no_reserved_zone_enabling;
++  void eden_allocate(
++    Register obj,                   // result: pointer to object after successful allocation
++    Register var_size_in_bytes,     // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,     // object size in bytes if   known at compile time
++    Register tmp,                   // temp register
++    Label&   slow_case,             // continuation point if fast allocation fails
++    bool is_far = false
++  );
 +
-+    ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset()));
-+    bltu(sp, t0, no_reserved_zone_enabling);
++  // Test sub_klass against super_klass, with fast and slow paths.
 +
-+    enter();   // RA and FP are live.
-+    mv(c_rarg0, xthread);
-+    int32_t offset = 0;
-+    la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)), offset);
-+    jalr(x1, t0, offset);
-+    leave();
++  // The fast path produces a tri-state answer: yes / no / maybe-slow.
++  // One of the three labels can be NULL, meaning take the fall-through.
++  // If super_check_offset is -1, the value is loaded up from super_klass.
++  // No registers are killed, except tmp_reg
++  void check_klass_subtype_fast_path(Register sub_klass,
++                                     Register super_klass,
++                                     Register tmp_reg,
++                                     Label* L_success,
++                                     Label* L_failure,
++                                     Label* L_slow_path,
++                                     Register super_check_offset = noreg);
 +
-+    // We have already removed our own frame.
-+    // throw_delayed_StackOverflowError will think that it's been
-+    // called by our caller.
-+    offset = 0;
-+    la_patchable(t0, RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()), offset);
-+    jalr(x0, t0, offset);
-+    should_not_reach_here();
++  // The reset of the type cehck; must be wired to a corresponding fast path.
++  // It does not repeat the fast path logic, so don't use it standalone.
++  // The tmp1_reg and tmp2_reg can be noreg, if no temps are avaliable.
++  // Updates the sub's secondary super cache as necessary.
++  void check_klass_subtype_slow_path(Register sub_klass,
++                                     Register super_klass,
++                                     Register tmp1_reg,
++                                     Register tmp2_reg,
++                                     Label* L_success,
++                                     Label* L_failure);
 +
-+    bind(no_reserved_zone_enabling);
-+}
++  void check_klass_subtype(Register sub_klass,
++                           Register super_klass,
++                           Register tmp_reg,
++                           Label& L_success);
 +
-+// Move the address of the polling page into dest.
-+void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) {
-+  ld(dest, Address(xthread, JavaThread::polling_page_offset()));
-+}
++  Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
 +
-+// Read the polling page.  The address of the polling page must
-+// already be in r.
-+address MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) {
-+  address mark;
-+  {
-+    InstructionMark im(this);
-+    code_section()->relocate(inst_mark(), rtype);
-+    lwu(zr, Address(r, offset));
-+    mark = inst_mark();
-+  }
-+  verify_cross_modify_fence_not_required();
-+  return mark;
-+}
++  // only if +VerifyOops
++  void verify_oop(Register reg, const char* s = "broken oop");
++  void verify_oop_addr(Address addr, const char* s = "broken oop addr");
 +
-+void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
-+#ifdef ASSERT
-+  {
-+    ThreadInVMfromUnknown tiv;
-+    assert (UseCompressedOops, "should only be used for compressed oops");
-+    assert (Universe::heap() != NULL, "java heap should be initialized");
-+    assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
-+    assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop");
-+  }
-+#endif
-+  int oop_index = oop_recorder()->find_index(obj);
-+  InstructionMark im(this);
-+  RelocationHolder rspec = oop_Relocation::spec(oop_index);
-+  code_section()->relocate(inst_mark(), rspec);
-+  li32(dst, 0xDEADBEEF);
-+  zero_extend(dst, dst, 32);
-+}
++  void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {}
++  void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {}
 +
-+void  MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
-+  assert (UseCompressedClassPointers, "should only be used for compressed headers");
-+  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
-+  int index = oop_recorder()->find_index(k);
-+  assert(!Universe::heap()->is_in(k), "should not be an oop");
++#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
++#define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
 +
-+  InstructionMark im(this);
-+  RelocationHolder rspec = metadata_Relocation::spec(index);
-+  code_section()->relocate(inst_mark(), rspec);
-+  narrowKlass nk = CompressedKlassPointers::encode(k);
-+  li32(dst, nk);
-+  zero_extend(dst, dst, 32);
-+}
++  // A more convenient access to fence for our purposes
++  // We used four bit to indicate the read and write bits in the predecessors and successors,
++  // and extended i for r, o for w if UseConservativeFence enabled.
++  enum Membar_mask_bits {
++    StoreStore = 0b0101,               // (pred = ow   + succ =   ow)
++    LoadStore  = 0b1001,               // (pred = ir   + succ =   ow)
++    StoreLoad  = 0b0110,               // (pred = ow   + succ =   ir)
++    LoadLoad   = 0b1010,               // (pred = ir   + succ =   ir)
++    AnyAny     = LoadStore | StoreLoad // (pred = iorw + succ = iorw)
++  };
 +
-+// Maybe emit a call via a trampoline.  If the code cache is small
-+// trampolines won't be emitted.
-+address MacroAssembler::trampoline_call(Address entry, CodeBuffer* cbuf) {
-+  assert(JavaThread::current()->is_Compiler_thread(), "just checking");
-+  assert(entry.rspec().type() == relocInfo::runtime_call_type ||
-+         entry.rspec().type() == relocInfo::opt_virtual_call_type ||
-+         entry.rspec().type() == relocInfo::static_call_type ||
-+         entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
++  void membar(uint32_t order_constraint);
 +
-+  // We need a trampoline if branches are far.
-+  if (far_branches()) {
-+    bool in_scratch_emit_size = false;
-+#ifdef COMPILER2
-+    // We don't want to emit a trampoline if C2 is generating dummy
-+    // code during its branch shortening phase.
-+    CompileTask* task = ciEnv::current()->task();
-+    in_scratch_emit_size =
-+      (task != NULL && is_c2_compile(task->comp_level()) &&
-+       Compile::current()->output()->in_scratch_emit_size());
-+#endif
-+    if (!in_scratch_emit_size) {
-+      address stub = emit_trampoline_stub(offset(), entry.target());
-+      if (stub == NULL) {
-+        postcond(pc() == badAddress);
-+        return NULL; // CodeCache is full
-+      }
++  static void membar_mask_to_pred_succ(uint32_t order_constraint, uint32_t& predecessor, uint32_t& successor) {
++    predecessor = (order_constraint >> 2) & 0x3;
++    successor = order_constraint & 0x3;
++
++    // extend rw -> iorw:
++    // 01(w) -> 0101(ow)
++    // 10(r) -> 1010(ir)
++    // 11(rw)-> 1111(iorw)
++    if (UseConservativeFence) {
++      predecessor |= predecessor << 2;
++      successor |= successor << 2;
 +    }
 +  }
 +
-+  if (cbuf != NULL) { cbuf->set_insts_mark(); }
-+  relocate(entry.rspec());
-+  if (!far_branches()) {
-+    jal(entry.target());
-+  } else {
-+    jal(pc());
++  static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) {
++    return ((predecessor & 0x3) << 2) | (successor & 0x3);
 +  }
-+  // just need to return a non-null address
-+  postcond(pc() != badAddress);
-+  return pc();
-+}
 +
-+address MacroAssembler::ic_call(address entry, jint method_index) {
-+  RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
-+  movptr(t1, (address)Universe::non_oop_word());
-+  assert_cond(entry != NULL);
-+  return trampoline_call(Address(entry, rh));
-+}
++  // prints msg, dumps registers and stops execution
++  void stop(const char* msg);
 +
-+// Emit a trampoline stub for a call to a target which is too far away.
-+//
-+// code sequences:
-+//
-+// call-site:
-+//   branch-and-link to <destination> or <trampoline stub>
-+//
-+// Related trampoline stub for this call site in the stub section:
-+//   load the call target from the constant pool
-+//   branch (RA still points to the call site above)
++  static void debug64(char* msg, int64_t pc, int64_t regs[]);
 +
-+address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
-+                                             address dest) {
-+  address stub = start_a_stub(NativeInstruction::instruction_size
-+                            + NativeCallTrampolineStub::instruction_size);
-+  if (stub == NULL) {
-+    return NULL;  // CodeBuffer::expand failed
-+  }
++  void unimplemented(const char* what = "");
 +
-+  // Create a trampoline stub relocation which relates this trampoline stub
-+  // with the call instruction at insts_call_instruction_offset in the
-+  // instructions code-section.
++  void should_not_reach_here() { stop("should not reach here"); }
 +
-+  // make sure 4 byte aligned here, so that the destination address would be
-+  // 8 byte aligned after 3 intructions
-+  // when we reach here we may get a 2-byte alignment so need to align it
-+  align(wordSize, NativeCallTrampolineStub::data_offset);
++  static address target_addr_for_insn(address insn_addr);
 +
-+  relocate(trampoline_stub_Relocation::spec(code()->insts()->start() +
-+                                            insts_call_instruction_offset));
-+  const int stub_start_offset = offset();
++  // Required platform-specific helpers for Label::patch_instructions.
++  // They _shadow_ the declarations in AbstractAssembler, which are undefined.
++  static int pd_patch_instruction_size(address branch, address target);
++  static void pd_patch_instruction(address branch, address target, const char* file = NULL, int line = 0) {
++    pd_patch_instruction_size(branch, target);
++  }
++  static address pd_call_destination(address branch) {
++    return target_addr_for_insn(branch);
++  }
 +
-+  // Now, create the trampoline stub's code:
-+  // - load the call
-+  // - call
-+  Label target;
-+  ld(t0, target);  // auipc + ld
-+  jr(t0);          // jalr
-+  bind(target);
-+  assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset,
-+         "should be");
-+  assert(offset() % wordSize == 0, "bad alignment");
-+  emit_int64((intptr_t)dest);
++  static int patch_oop(address insn_addr, address o);
++  address emit_trampoline_stub(int insts_call_instruction_offset, address target);
++  void emit_static_call_stub();
 +
-+  const address stub_start_addr = addr_at(stub_start_offset);
++  // The following 4 methods return the offset of the appropriate move instruction
 +
-+  assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
++  // Support for fast byte/short loading with zero extension (depending on particular CPU)
++  int load_unsigned_byte(Register dst, Address src);
++  int load_unsigned_short(Register dst, Address src);
 +
-+  end_a_stub();
-+  return stub_start_addr;
-+}
++  // Support for fast byte/short loading with sign extension (depending on particular CPU)
++  int load_signed_byte(Register dst, Address src);
++  int load_signed_short(Register dst, Address src);
 +
-+Address MacroAssembler::add_memory_helper(const Address dst) {
-+  switch (dst.getMode()) {
-+    case Address::base_plus_offset:
-+      // This is the expected mode, although we allow all the other
-+      // forms below.
-+      return form_address(t1, dst.base(), dst.offset());
-+    default:
-+      la(t1, dst);
-+      return Address(t1);
-+  }
-+}
++  // Load and store values by size and signed-ness
++  void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
++  void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
 +
-+void MacroAssembler::add_memory_int64(const Address dst, int64_t imm) {
-+  Address adr = add_memory_helper(dst);
-+  assert_different_registers(adr.base(), t0);
-+  ld(t0, adr);
-+  addi(t0, t0, imm);
-+  sd(t0, adr);
-+}
++ public:
++  // Standard pseudoinstruction
++  void nop();
++  void mv(Register Rd, Register Rs);
++  void notr(Register Rd, Register Rs);
++  void neg(Register Rd, Register Rs);
++  void negw(Register Rd, Register Rs);
++  void sext_w(Register Rd, Register Rs);
++  void zext_b(Register Rd, Register Rs);
++  void seqz(Register Rd, Register Rs);          // set if = zero
++  void snez(Register Rd, Register Rs);          // set if != zero
++  void sltz(Register Rd, Register Rs);          // set if < zero
++  void sgtz(Register Rd, Register Rs);          // set if > zero
 +
-+void MacroAssembler::add_memory_int32(const Address dst, int32_t imm) {
-+  Address adr = add_memory_helper(dst);
-+  assert_different_registers(adr.base(), t0);
-+  lwu(t0, adr);
-+  addiw(t0, t0, imm);
-+  sw(t0, adr);
-+}
++  // Float pseudoinstruction
++  void fmv_s(FloatRegister Rd, FloatRegister Rs);
++  void fabs_s(FloatRegister Rd, FloatRegister Rs);    // single-precision absolute value
++  void fneg_s(FloatRegister Rd, FloatRegister Rs);
 +
-+void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) {
-+  assert_different_registers(src1, t0);
-+  int32_t offset;
-+  la_patchable(t0, src2, offset);
-+  ld(t0, Address(t0, offset));
-+  beq(src1, t0, equal);
-+}
++  // Double pseudoinstruction
++  void fmv_d(FloatRegister Rd, FloatRegister Rs);
++  void fabs_d(FloatRegister Rd, FloatRegister Rs);
++  void fneg_d(FloatRegister Rd, FloatRegister Rs);
 +
-+void MacroAssembler::load_method_holder_cld(Register result, Register method) {
-+  load_method_holder(result, method);
-+  ld(result, Address(result, InstanceKlass::class_loader_data_offset()));
-+}
++  // Pseudoinstruction for control and status register
++  void rdinstret(Register Rd);                  // read instruction-retired counter
++  void rdcycle(Register Rd);                    // read cycle counter
++  void rdtime(Register Rd);                     // read time
++  void csrr(Register Rd, unsigned csr);         // read csr
++  void csrw(unsigned csr, Register Rs);         // write csr
++  void csrs(unsigned csr, Register Rs);         // set bits in csr
++  void csrc(unsigned csr, Register Rs);         // clear bits in csr
++  void csrwi(unsigned csr, unsigned imm);
++  void csrsi(unsigned csr, unsigned imm);
++  void csrci(unsigned csr, unsigned imm);
++  void frcsr(Register Rd);                      // read float-point csr
++  void fscsr(Register Rd, Register Rs);         // swap float-point csr
++  void fscsr(Register Rs);                      // write float-point csr
++  void frrm(Register Rd);                       // read float-point rounding mode
++  void fsrm(Register Rd, Register Rs);          // swap float-point rounding mode
++  void fsrm(Register Rs);                       // write float-point rounding mode
++  void fsrmi(Register Rd, unsigned imm);
++  void fsrmi(unsigned imm);
++  void frflags(Register Rd);                    // read float-point exception flags
++  void fsflags(Register Rd, Register Rs);       // swap float-point exception flags
++  void fsflags(Register Rs);                    // write float-point exception flags
++  void fsflagsi(Register Rd, unsigned imm);
++  void fsflagsi(unsigned imm);
 +
-+void MacroAssembler::load_method_holder(Register holder, Register method) {
-+  ld(holder, Address(method, Method::const_offset()));                      // ConstMethod*
-+  ld(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
-+  ld(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
-+}
++  void beqz(Register Rs, const address &dest);
++  void bnez(Register Rs, const address &dest);
++  void blez(Register Rs, const address &dest);
++  void bgez(Register Rs, const address &dest);
++  void bltz(Register Rs, const address &dest);
++  void bgtz(Register Rs, const address &dest);
++  void la(Register Rd, Label &label);
++  void la(Register Rd, const address &dest);
++  void la(Register Rd, const Address &adr);
++  //label
++  void beqz(Register Rs, Label &l, bool is_far = false);
++  void bnez(Register Rs, Label &l, bool is_far = false);
++  void blez(Register Rs, Label &l, bool is_far = false);
++  void bgez(Register Rs, Label &l, bool is_far = false);
++  void bltz(Register Rs, Label &l, bool is_far = false);
++  void bgtz(Register Rs, Label &l, bool is_far = false);
++  void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 +
-+// string indexof
-+// compute index by trailing zeros
-+void MacroAssembler::compute_index(Register haystack, Register trailing_zeros,
-+                                   Register match_mask, Register result,
-+                                   Register ch2, Register tmp,
-+                                   bool haystack_isL)
-+{
-+  int haystack_chr_shift = haystack_isL ? 0 : 1;
-+  srl(match_mask, match_mask, trailing_zeros);
-+  srli(match_mask, match_mask, 1);
-+  srli(tmp, trailing_zeros, LogBitsPerByte);
-+  if (!haystack_isL) andi(tmp, tmp, 0xE);
-+  add(haystack, haystack, tmp);
-+  ld(ch2, Address(haystack));
-+  if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift);
-+  add(result, result, tmp);
-+}
++private:
++  int push_reg(unsigned int bitset, Register stack);
++  int pop_reg(unsigned int bitset, Register stack);
++  int push_fp(unsigned int bitset, Register stack);
++  int pop_fp(unsigned int bitset, Register stack);
 +
-+// string indexof
-+// Find pattern element in src, compute match mask,
-+// only the first occurrence of 0x80/0x8000 at low bits is the valid match index
-+// match mask patterns and corresponding indices would be like:
-+// - 0x8080808080808080 (Latin1)
-+// -   7 6 5 4 3 2 1 0  (match index)
-+// - 0x8000800080008000 (UTF16)
-+// -   3   2   1   0    (match index)
-+void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask,
-+                                        Register mask1, Register mask2)
-+{
-+  xorr(src, pattern, src);
-+  sub(match_mask, src, mask1);
-+  orr(src, src, mask2);
-+  notr(src, src);
-+  andr(match_mask, match_mask, src);
-+}
++public:
++  void push_reg(Register Rs);
++  void pop_reg(Register Rd);
++  void push_reg(RegSet regs, Register stack) { if (regs.bits()) push_reg(regs.bits(), stack); }
++  void pop_reg(RegSet regs, Register stack)  { if (regs.bits()) pop_reg(regs.bits(), stack); }
 +
-+#ifdef COMPILER2
-+// Code for BigInteger::mulAdd instrinsic
-+// out     = x10
-+// in      = x11
-+// offset  = x12  (already out.length-offset)
-+// len     = x13
-+// k       = x14
-+// tmp     = x28
-+//
-+// pseudo code from java implementation:
-+// long kLong = k & LONG_MASK;
-+// carry = 0;
-+// offset = out.length-offset - 1;
-+// for (int j = len - 1; j >= 0; j--) {
-+//     product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry;
-+//     out[offset--] = (int)product;
-+//     carry = product >>> 32;
-+// }
-+// return (int)carry;
-+void MacroAssembler::mul_add(Register out, Register in, Register offset,
-+                             Register len, Register k, Register tmp) {
-+  Label L_tail_loop, L_unroll, L_end;
-+  mv(tmp, out);
-+  mv(out, zr);
-+  blez(len, L_end);
-+  zero_extend(k, k, 32);
-+  slliw(t0, offset, LogBytesPerInt);
-+  add(offset, tmp, t0);
-+  slliw(t0, len, LogBytesPerInt);
-+  add(in, in, t0);
++  // Push and pop everything that might be clobbered by a native
++  // runtime call except t0 and t1. (They are always
++  // temporary registers, so we don't have to protect them.)
++  // Additional registers can be excluded in a passed RegSet.
++  void push_call_clobbered_registers_except(RegSet exclude);
++  void pop_call_clobbered_registers_except(RegSet exclude);
 +
-+  const int unroll = 8;
-+  li(tmp, unroll);
-+  blt(len, tmp, L_tail_loop);
-+  bind(L_unroll);
-+  for (int i = 0; i < unroll; i++) {
-+    sub(in, in, BytesPerInt);
-+    lwu(t0, Address(in, 0));
-+    mul(t1, t0, k);
-+    add(t0, t1, out);
-+    sub(offset, offset, BytesPerInt);
-+    lwu(t1, Address(offset, 0));
-+    add(t0, t0, t1);
-+    sw(t0, Address(offset, 0));
-+    srli(out, t0, 32);
++  void push_call_clobbered_registers() {
++    push_call_clobbered_registers_except(RegSet());
++  }
++  void pop_call_clobbered_registers() {
++    pop_call_clobbered_registers_except(RegSet());
 +  }
-+  subw(len, len, tmp);
-+  bge(len, tmp, L_unroll);
-+
-+  bind(L_tail_loop);
-+  blez(len, L_end);
-+  sub(in, in, BytesPerInt);
-+  lwu(t0, Address(in, 0));
-+  mul(t1, t0, k);
-+  add(t0, t1, out);
-+  sub(offset, offset, BytesPerInt);
-+  lwu(t1, Address(offset, 0));
-+  add(t0, t0, t1);
-+  sw(t0, Address(offset, 0));
-+  srli(out, t0, 32);
-+  subw(len, len, 1);
-+  j(L_tail_loop);
 +
-+  bind(L_end);
-+}
++  void pusha();
++  void popa();
++  void push_CPU_state();
++  void pop_CPU_state();
 +
-+// add two unsigned input and output carry
-+void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry)
-+{
-+  assert_different_registers(dst, carry);
-+  assert_different_registers(dst, src2);
-+  add(dst, src1, src2);
-+  sltu(carry, dst, src2);
-+}
++  // if heap base register is used - reinit it with the correct value
++  void reinit_heapbase();
 +
-+// add two input with carry
-+void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry)
-+{
-+  assert_different_registers(dst, carry);
-+  add(dst, src1, src2);
-+  add(dst, dst, carry);
-+}
++  void bind(Label& L) {
++    Assembler::bind(L);
++    // fences across basic blocks should not be merged
++    code()->clear_last_insn();
++  }
 +
-+// add two unsigned input with carry and output carry
-+void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry)
-+{
-+  assert_different_registers(dst, src2);
-+  adc(dst, src1, src2, carry);
-+  sltu(carry, dst, src2);
-+}
++  // mv
++  void mv(Register Rd, address addr)          { li(Rd, (int64_t)addr); }
 +
-+void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
-+                                     Register src1, Register src2, Register carry)
-+{
-+  cad(dest_lo, dest_lo, src1, carry);
-+  add(dest_hi, dest_hi, carry);
-+  cad(dest_lo, dest_lo, src2, carry);
-+  add(final_dest_hi, dest_hi, carry);
-+}
++  inline void mv(Register Rd, int imm64)                { li(Rd, (int64_t)imm64); }
++  inline void mv(Register Rd, long imm64)               { li(Rd, (int64_t)imm64); }
++  inline void mv(Register Rd, long long imm64)          { li(Rd, (int64_t)imm64); }
++  inline void mv(Register Rd, unsigned int imm64)       { li(Rd, (int64_t)imm64); }
++  inline void mv(Register Rd, unsigned long imm64)      { li(Rd, (int64_t)imm64); }
++  inline void mv(Register Rd, unsigned long long imm64) { li(Rd, (int64_t)imm64); }
 +
-+/**
-+ * Multiply 32 bit by 32 bit first loop.
-+ */
-+void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
-+                                           Register y, Register y_idx, Register z,
-+                                           Register carry, Register product,
-+                                           Register idx, Register kdx)
-+{
-+  // jlong carry, x[], y[], z[];
-+  // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
-+  //     long product = y[idx] * x[xstart] + carry;
-+  //     z[kdx] = (int)product;
-+  //     carry = product >>> 32;
-+  // }
-+  // z[xstart] = (int)carry;
++  inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); }
 +
-+  Label L_first_loop, L_first_loop_exit;
-+  blez(idx, L_first_loop_exit);
++  void mv(Register Rd, Address dest);
++  void mv(Register Rd, RegisterOrConstant src);
 +
-+  shadd(t0, xstart, x, t0, LogBytesPerInt);
-+  lwu(x_xstart, Address(t0, 0));
++  // logic
++  void andrw(Register Rd, Register Rs1, Register Rs2);
++  void orrw(Register Rd, Register Rs1, Register Rs2);
++  void xorrw(Register Rd, Register Rs1, Register Rs2);
 +
-+  bind(L_first_loop);
-+  subw(idx, idx, 1);
-+  shadd(t0, idx, y, t0, LogBytesPerInt);
-+  lwu(y_idx, Address(t0, 0));
-+  mul(product, x_xstart, y_idx);
-+  add(product, product, carry);
-+  srli(carry, product, 32);
-+  subw(kdx, kdx, 1);
-+  shadd(t0, kdx, z, t0, LogBytesPerInt);
-+  sw(product, Address(t0, 0));
-+  bgtz(idx, L_first_loop);
++  // revb
++  void revb_h_h(Register Rd, Register Rs, Register tmp = t0);                           // reverse bytes in halfword in lower 16 bits, sign-extend
++  void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);      // reverse bytes in lower word, sign-extend
++  void revb_h_h_u(Register Rd, Register Rs, Register tmp = t0);                         // reverse bytes in halfword in lower 16 bits, zero-extend
++  void revb_h_w_u(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);    // reverse bytes in halfwords in lower 32 bits, zero-extend
++  void revb_h_helper(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);  // reverse bytes in upper 16 bits (48:63) and move to lower
++  void revb_h(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);         // reverse bytes in each halfword
++  void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);         // reverse bytes in each word
++  void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);          // reverse bytes in doubleword
 +
-+  bind(L_first_loop_exit);
-+}
++  void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0);
++  void andi(Register Rd, Register Rn, int64_t imm, Register tmp = t0);
++  void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1);
 +
-+/**
-+ * Multiply 64 bit by 64 bit first loop.
-+ */
-+void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
-+                                           Register y, Register y_idx, Register z,
-+                                           Register carry, Register product,
-+                                           Register idx, Register kdx)
-+{
-+  //
-+  //  jlong carry, x[], y[], z[];
-+  //  for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
-+  //    huge_128 product = y[idx] * x[xstart] + carry;
-+  //    z[kdx] = (jlong)product;
-+  //    carry  = (jlong)(product >>> 64);
-+  //  }
-+  //  z[xstart] = carry;
-+  //
++  void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail);
++  void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail);
++  void cmpxchg(Register addr, Register expected,
++               Register new_val,
++               enum operand_size size,
++               Assembler::Aqrl acquire, Assembler::Aqrl release,
++               Register result, bool result_as_bool = false);
++  void cmpxchg_weak(Register addr, Register expected,
++                    Register new_val,
++                    enum operand_size size,
++                    Assembler::Aqrl acquire, Assembler::Aqrl release,
++                    Register result);
++  void cmpxchg_narrow_value_helper(Register addr, Register expected,
++                                   Register new_val,
++                                   enum operand_size size,
++                                   Register tmp1, Register tmp2, Register tmp3);
++  void cmpxchg_narrow_value(Register addr, Register expected,
++                            Register new_val,
++                            enum operand_size size,
++                            Assembler::Aqrl acquire, Assembler::Aqrl release,
++                            Register result, bool result_as_bool,
++                            Register tmp1, Register tmp2, Register tmp3);
++  void weak_cmpxchg_narrow_value(Register addr, Register expected,
++                                 Register new_val,
++                                 enum operand_size size,
++                                 Assembler::Aqrl acquire, Assembler::Aqrl release,
++                                 Register result,
++                                 Register tmp1, Register tmp2, Register tmp3);
 +
-+  Label L_first_loop, L_first_loop_exit;
-+  Label L_one_x, L_one_y, L_multiply;
++  void atomic_add(Register prev, RegisterOrConstant incr, Register addr);
++  void atomic_addw(Register prev, RegisterOrConstant incr, Register addr);
++  void atomic_addal(Register prev, RegisterOrConstant incr, Register addr);
++  void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr);
 +
-+  subw(xstart, xstart, 1);
-+  bltz(xstart, L_one_x);
++  void atomic_xchg(Register prev, Register newv, Register addr);
++  void atomic_xchgw(Register prev, Register newv, Register addr);
++  void atomic_xchgal(Register prev, Register newv, Register addr);
++  void atomic_xchgalw(Register prev, Register newv, Register addr);
++  void atomic_xchgwu(Register prev, Register newv, Register addr);
++  void atomic_xchgalwu(Register prev, Register newv, Register addr);
 +
-+  shadd(t0, xstart, x, t0, LogBytesPerInt);
-+  ld(x_xstart, Address(t0, 0));
-+  ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian
++  static bool far_branches() {
++    return ReservedCodeCacheSize > branch_range;
++  }
 +
-+  bind(L_first_loop);
-+  subw(idx, idx, 1);
-+  bltz(idx, L_first_loop_exit);
-+  subw(idx, idx, 1);
-+  bltz(idx, L_one_y);
++  // Jumps that can reach anywhere in the code cache.
++  // Trashes tmp.
++  void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0);
++  void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0);
 +
-+  shadd(t0, idx, y, t0, LogBytesPerInt);
-+  ld(y_idx, Address(t0, 0));
-+  ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian
-+  bind(L_multiply);
++  static int far_branch_size() {
++    if (far_branches()) {
++      return 2 * 4;  // auipc + jalr, see far_call() & far_jump()
++    } else {
++      return 4;
++    }
++  }
 +
-+  mulhu(t0, x_xstart, y_idx);
-+  mul(product, x_xstart, y_idx);
-+  cad(product, product, carry, t1);
-+  adc(carry, t0, zr, t1);
++  void load_byte_map_base(Register reg);
 +
-+  subw(kdx, kdx, 2);
-+  ror_imm(product, product, 32); // back to big-endian
-+  shadd(t0, kdx, z, t0, LogBytesPerInt);
-+  sd(product, Address(t0, 0));
++  void bang_stack_with_offset(int offset) {
++    // stack grows down, caller passes positive offset
++    assert(offset > 0, "must bang with negative offset");
++    sub(t0, sp, offset);
++    sd(zr, Address(t0));
++  }
 +
-+  j(L_first_loop);
++  void la_patchable(Register reg1, const Address &dest, int32_t &offset);
 +
-+  bind(L_one_y);
-+  lwu(y_idx, Address(y, 0));
-+  j(L_multiply);
++  virtual void _call_Unimplemented(address call_site) {
++    mv(t1, call_site);
++  }
 +
-+  bind(L_one_x);
-+  lwu(x_xstart, Address(x, 0));
-+  j(L_first_loop);
++  #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__)
 +
-+  bind(L_first_loop_exit);
-+}
++  // Frame creation and destruction shared between JITs.
++  void build_frame(int framesize);
++  void remove_frame(int framesize);
 +
-+/**
-+ * Multiply 128 bit by 128 bit. Unrolled inner loop.
-+ *
-+ */
-+void MacroAssembler::multiply_128_x_128_loop(Register y, Register z,
-+                                             Register carry, Register carry2,
-+                                             Register idx, Register jdx,
-+                                             Register yz_idx1, Register yz_idx2,
-+                                             Register tmp, Register tmp3, Register tmp4,
-+                                             Register tmp6, Register product_hi)
-+{
-+  //   jlong carry, x[], y[], z[];
-+  //   int kdx = xstart+1;
-+  //   for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
-+  //     huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry;
-+  //     jlong carry2  = (jlong)(tmp3 >>> 64);
-+  //     huge_128 tmp4 = (y[idx]   * product_hi) + z[kdx+idx] + carry2;
-+  //     carry  = (jlong)(tmp4 >>> 64);
-+  //     z[kdx+idx+1] = (jlong)tmp3;
-+  //     z[kdx+idx] = (jlong)tmp4;
-+  //   }
-+  //   idx += 2;
-+  //   if (idx > 0) {
-+  //     yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry;
-+  //     z[kdx+idx] = (jlong)yz_idx1;
-+  //     carry  = (jlong)(yz_idx1 >>> 64);
-+  //   }
-+  //
++  void reserved_stack_check();
 +
-+  Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
++  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
++                                                Register tmp,
++                                                int offset);
 +
-+  srliw(jdx, idx, 2);
++  void get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype);
++  void read_polling_page(Register r, address page, relocInfo::relocType rtype);
++  void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
 +
-+  bind(L_third_loop);
++  address trampoline_call(Address entry, CodeBuffer* cbuf = NULL);
++  address ic_call(address entry, jint method_index = 0);
 +
-+  subw(jdx, jdx, 1);
-+  bltz(jdx, L_third_loop_exit);
-+  subw(idx, idx, 4);
++  // Support for memory inc/dec
++  // n.b. increment/decrement calls with an Address destination will
++  // need to use a scratch register to load the value to be
++  // incremented. increment/decrement calls which add or subtract a
++  // constant value other than sign-extended 12-bit immediate will need
++  // to use a 2nd scratch register to hold the constant. so, an address
++  // increment/decrement may trash both t0 and t1.
 +
-+  shadd(t0, idx, y, t0, LogBytesPerInt);
-+  ld(yz_idx2, Address(t0, 0));
-+  ld(yz_idx1, Address(t0, wordSize));
++  void increment(const Address dst, int64_t value = 1);
++  void incrementw(const Address dst, int32_t value = 1);
 +
-+  shadd(tmp6, idx, z, t0, LogBytesPerInt);
++  void decrement(const Address dst, int64_t value = 1);
++  void decrementw(const Address dst, int32_t value = 1);
 +
-+  ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian
-+  ror_imm(yz_idx2, yz_idx2, 32);
++  void cmpptr(Register src1, Address src2, Label& equal);
 +
-+  ld(t1, Address(tmp6, 0));
-+  ld(t0, Address(tmp6, wordSize));
++  void compute_index(Register str1, Register trailing_zeros, Register match_mask,
++                     Register result, Register char_tmp, Register tmp,
++                     bool haystack_isL);
++  void compute_match_mask(Register src, Register pattern, Register match_mask,
++                          Register mask1, Register mask2);
 +
-+  mul(tmp3, product_hi, yz_idx1); //  yz_idx1 * product_hi -> tmp4:tmp3
-+  mulhu(tmp4, product_hi, yz_idx1);
++#ifdef COMPILER2
++  void mul_add(Register out, Register in, Register offset,
++               Register len, Register k, Register tmp);
++  void cad(Register dst, Register src1, Register src2, Register carry);
++  void cadc(Register dst, Register src1, Register src2, Register carry);
++  void adc(Register dst, Register src1, Register src2, Register carry);
++  void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
++                       Register src1, Register src2, Register carry);
++  void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
++                             Register y, Register y_idx, Register z,
++                             Register carry, Register product,
++                             Register idx, Register kdx);
++  void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
++                             Register y, Register y_idx, Register z,
++                             Register carry, Register product,
++                             Register idx, Register kdx);
++  void multiply_128_x_128_loop(Register y, Register z,
++                               Register carry, Register carry2,
++                               Register idx, Register jdx,
++                               Register yz_idx1, Register yz_idx2,
++                               Register tmp, Register tmp3, Register tmp4,
++                               Register tmp6, Register product_hi);
++  void multiply_to_len(Register x, Register xlen, Register y, Register ylen,
++                       Register z, Register zlen,
++                       Register tmp1, Register tmp2, Register tmp3, Register tmp4,
++                       Register tmp5, Register tmp6, Register product_hi);
++#endif
 +
-+  ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian
-+  ror_imm(t1, t1, 32, tmp);
++  void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
++  void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
 +
-+  mul(tmp, product_hi, yz_idx2); //  yz_idx2 * product_hi -> carry2:tmp
-+  mulhu(carry2, product_hi, yz_idx2);
++  void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1);
 +
-+  cad(tmp3, tmp3, carry, carry);
-+  adc(tmp4, tmp4, zr, carry);
-+  cad(tmp3, tmp3, t0, t0);
-+  cadc(tmp4, tmp4, tmp, t0);
-+  adc(carry, carry2, zr, t0);
-+  cad(tmp4, tmp4, t1, carry2);
-+  adc(carry, carry, zr, carry2);
++  void zero_words(Register base, u_int64_t cnt);
++  address zero_words(Register ptr, Register cnt);
++  void fill_words(Register base, Register cnt, Register value);
++  void zero_memory(Register addr, Register len, Register tmp);
 +
-+  ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian
-+  ror_imm(tmp4, tmp4, 32);
-+  sd(tmp4, Address(tmp6, 0));
-+  sd(tmp3, Address(tmp6, wordSize));
++  // shift left by shamt and add
++  void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt);
 +
-+  j(L_third_loop);
++  // Here the float instructions with safe deal with some exceptions.
++  // e.g. convert from NaN, +Inf, -Inf to int, float, double
++  // will trigger exception, we need to deal with these situations
++  // to get correct results.
++  void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0);
++  void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0);
++  void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
++  void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
 +
-+  bind(L_third_loop_exit);
++  // vector load/store unit-stride instructions
++  void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
++    switch (sew) {
++      case Assembler::e64:
++        vle64_v(vd, base, vm);
++        break;
++      case Assembler::e32:
++        vle32_v(vd, base, vm);
++        break;
++      case Assembler::e16:
++        vle16_v(vd, base, vm);
++        break;
++      case Assembler::e8: // fall through
++      default:
++        vle8_v(vd, base, vm);
++        break;
++    }
++  }
 +
-+  andi(idx, idx, 0x3);
-+  beqz(idx, L_post_third_loop_done);
++  void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
++    switch (sew) {
++      case Assembler::e64:
++        vse64_v(store_data, base, vm);
++        break;
++      case Assembler::e32:
++        vse32_v(store_data, base, vm);
++        break;
++      case Assembler::e16:
++        vse16_v(store_data, base, vm);
++        break;
++      case Assembler::e8: // fall through
++      default:
++        vse8_v(store_data, base, vm);
++        break;
++    }
++  }
 +
-+  Label L_check_1;
-+  subw(idx, idx, 2);
-+  bltz(idx, L_check_1);
++  static const int zero_words_block_size;
 +
-+  shadd(t0, idx, y, t0, LogBytesPerInt);
-+  ld(yz_idx1, Address(t0, 0));
-+  ror_imm(yz_idx1, yz_idx1, 32);
++  void cast_primitive_type(BasicType type, Register Rt) {
++    switch (type) {
++      case T_BOOLEAN:
++        sltu(Rt, zr, Rt);
++        break;
++      case T_CHAR   :
++        zero_extend(Rt, Rt, 16);
++        break;
++      case T_BYTE   :
++        sign_extend(Rt, Rt, 8);
++        break;
++      case T_SHORT  :
++        sign_extend(Rt, Rt, 16);
++        break;
++      case T_INT    :
++        addw(Rt, Rt, zr);
++        break;
++      case T_LONG   : /* nothing to do */        break;
++      case T_VOID   : /* nothing to do */        break;
++      case T_FLOAT  : /* nothing to do */        break;
++      case T_DOUBLE : /* nothing to do */        break;
++      default: ShouldNotReachHere();
++    }
++  }
 +
-+  mul(tmp3, product_hi, yz_idx1); //  yz_idx1 * product_hi -> tmp4:tmp3
-+  mulhu(tmp4, product_hi, yz_idx1);
++  // float cmp with unordered_result
++  void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
++  void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
 +
-+  shadd(t0, idx, z, t0, LogBytesPerInt);
-+  ld(yz_idx2, Address(t0, 0));
-+  ror_imm(yz_idx2, yz_idx2, 32, tmp);
++  // Zero/Sign-extend
++  void zero_extend(Register dst, Register src, int bits);
++  void sign_extend(Register dst, Register src, int bits);
 +
-+  add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp);
++  // compare src1 and src2 and get -1/0/1 in dst.
++  // if [src1 > src2], dst = 1;
++  // if [src1 == src2], dst = 0;
++  // if [src1 < src2], dst = -1;
++  void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0);
 +
-+  ror_imm(tmp3, tmp3, 32, tmp);
-+  sd(tmp3, Address(t0, 0));
++  // vext
++  void vmnot_m(VectorRegister vd, VectorRegister vs);
++  void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked);
++  void vfneg_v(VectorRegister vd, VectorRegister vs);
 +
-+  bind(L_check_1);
++private:
 +
-+  andi(idx, idx, 0x1);
-+  subw(idx, idx, 1);
-+  bltz(idx, L_post_third_loop_done);
-+  shadd(t0, idx, y, t0, LogBytesPerInt);
-+  lwu(tmp4, Address(t0, 0));
-+  mul(tmp3, tmp4, product_hi); //  tmp4 * product_hi -> carry2:tmp3
-+  mulhu(carry2, tmp4, product_hi);
++#ifdef ASSERT
++  // Macro short-hand support to clean-up after a failed call to trampoline
++  // call generation (see trampoline_call() below), when a set of Labels must
++  // be reset (before returning).
++#define reset_labels1(L1) L1.reset()
++#define reset_labels2(L1, L2) L1.reset(); L2.reset()
++#define reset_labels3(L1, L2, L3) L1.reset(); reset_labels2(L2, L3)
++#define reset_labels5(L1, L2, L3, L4, L5) reset_labels2(L1, L2); reset_labels3(L3, L4, L5)
++#endif
++  void repne_scan(Register addr, Register value, Register count, Register tmp);
 +
-+  shadd(t0, idx, z, t0, LogBytesPerInt);
-+  lwu(tmp4, Address(t0, 0));
++  // Return true if an address is within the 48-bit RISCV64 address space.
++  bool is_valid_riscv64_address(address addr) {
++    // sv48: must have bits 63–48 all equal to bit 47
++    return ((uintptr_t)addr >> 47) == 0;
++  }
 +
-+  add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0);
++  void ld_constant(Register dest, const Address &const_addr) {
++    if (NearCpool) {
++      ld(dest, const_addr);
++    } else {
++      int32_t offset = 0;
++      la_patchable(dest, InternalAddress(const_addr.target()), offset);
++      ld(dest, Address(dest, offset));
++    }
++  }
 +
-+  shadd(t0, idx, z, t0, LogBytesPerInt);
-+  sw(tmp3, Address(t0, 0));
++  int bitset_to_regs(unsigned int bitset, unsigned char* regs);
++  Address add_memory_helper(const Address dst);
 +
-+  slli(t0, carry2, 32);
-+  srli(carry, tmp3, 32);
-+  orr(carry, carry, t0);
++  void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire);
++  void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release);
 +
-+  bind(L_post_third_loop_done);
-+}
++public:
++  void string_compare(Register str1, Register str2,
++                      Register cnt1, Register cnt2, Register result,
++                      Register tmp1, Register tmp2, Register tmp3,
++                      int ae);
 +
-+/**
-+ * Code for BigInteger::multiplyToLen() intrinsic.
-+ *
-+ * x10: x
-+ * x11: xlen
-+ * x12: y
-+ * x13: ylen
-+ * x14: z
-+ * x15: zlen
-+ * x16: tmp1
-+ * x17: tmp2
-+ * x7:  tmp3
-+ * x28: tmp4
-+ * x29: tmp5
-+ * x30: tmp6
-+ * x31: tmp7
-+ */
-+void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen,
-+                                     Register z, Register zlen,
-+                                     Register tmp1, Register tmp2, Register tmp3, Register tmp4,
-+                                     Register tmp5, Register tmp6, Register product_hi)
-+{
-+  assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
++  void string_indexof_char_short(Register str1, Register cnt1,
++                                 Register ch, Register result,
++                                 bool isL);
 +
-+  const Register idx = tmp1;
-+  const Register kdx = tmp2;
-+  const Register xstart = tmp3;
++  void string_indexof_char(Register str1, Register cnt1,
++                           Register ch, Register result,
++                           Register tmp1, Register tmp2,
++                           Register tmp3, Register tmp4,
++                           bool isL);
 +
-+  const Register y_idx = tmp4;
-+  const Register carry = tmp5;
-+  const Register product = xlen;
-+  const Register x_xstart = zlen; // reuse register
++  void string_indexof(Register str1, Register str2,
++                      Register cnt1, Register cnt2,
++                      Register tmp1, Register tmp2,
++                      Register tmp3, Register tmp4,
++                      Register tmp5, Register tmp6,
++                      Register result, int ae);
 +
-+  mv(idx, ylen); // idx = ylen;
-+  mv(kdx, zlen); // kdx = xlen+ylen;
-+  mv(carry, zr); // carry = 0;
++  void string_indexof_linearscan(Register haystack, Register needle,
++                                 Register haystack_len, Register needle_len,
++                                 Register tmp1, Register tmp2,
++                                 Register tmp3, Register tmp4,
++                                 int needle_con_cnt, Register result, int ae);
 +
-+  Label L_multiply_64_x_64_loop, L_done;
++  void arrays_equals(Register r1, Register r2,
++                     Register tmp3, Register tmp4,
++                     Register tmp5, Register tmp6,
++                     Register result, Register cnt1,
++                     int elem_size);
 +
-+  subw(xstart, xlen, 1);
-+  bltz(xstart, L_done);
++  void string_equals(Register r1, Register r2,
++                     Register result, Register cnt1,
++                     int elem_size);
 +
-+  const Register jdx = tmp1;
++  // refer to conditional_branches and float_conditional_branches
++  static const int bool_test_bits = 3;
++  static const int neg_cond_bits = 2;
++  static const int unsigned_branch_mask = 1 << bool_test_bits;
++  static const int double_branch_mask = 1 << bool_test_bits;
 +
-+  if (AvoidUnalignedAccesses) {
-+    // Check if x and y are both 8-byte aligned.
-+    orr(t0, xlen, ylen);
-+    andi(t0, t0, 0x1);
-+    beqz(t0, L_multiply_64_x_64_loop);
++  // cmp
++  void cmp_branch(int cmpFlag,
++                  Register op1, Register op2,
++                  Label& label, bool is_far = false);
 +
-+    multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
-+    shadd(t0, xstart, z, t0, LogBytesPerInt);
-+    sw(carry, Address(t0, 0));
++  void float_cmp_branch(int cmpFlag,
++                        FloatRegister op1, FloatRegister op2,
++                        Label& label, bool is_far = false);
 +
-+    Label L_second_loop_unaligned;
-+    bind(L_second_loop_unaligned);
-+    mv(carry, zr);
-+    mv(jdx, ylen);
-+    subw(xstart, xstart, 1);
-+    bltz(xstart, L_done);
-+    sub(sp, sp, 2 * wordSize);
-+    sd(z, Address(sp, 0));
-+    sd(zr, Address(sp, wordSize));
-+    shadd(t0, xstart, z, t0, LogBytesPerInt);
-+    addi(z, t0, 4);
-+    shadd(t0, xstart, x, t0, LogBytesPerInt);
-+    lwu(product, Address(t0, 0));
-+    Label L_third_loop, L_third_loop_exit;
++  void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op,
++                                    Label& L, bool is_far = false);
 +
-+    blez(jdx, L_third_loop_exit);
++  void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op,
++                               Label& L, bool is_far = false);
 +
-+    bind(L_third_loop);
-+    subw(jdx, jdx, 1);
-+    shadd(t0, jdx, y, t0, LogBytesPerInt);
-+    lwu(t0, Address(t0, 0));
-+    mul(t1, t0, product);
-+    add(t0, t1, carry);
-+    shadd(tmp6, jdx, z, t1, LogBytesPerInt);
-+    lwu(t1, Address(tmp6, 0));
-+    add(t0, t0, t1);
-+    sw(t0, Address(tmp6, 0));
-+    srli(carry, t0, 32);
-+    bgtz(jdx, L_third_loop);
++  void enc_cmove(int cmpFlag,
++                 Register op1, Register op2,
++                 Register dst, Register src);
 +
-+    bind(L_third_loop_exit);
-+    ld(z, Address(sp, 0));
-+    addi(sp, sp, 2 * wordSize);
-+    shadd(t0, xstart, z, t0, LogBytesPerInt);
-+    sw(carry, Address(t0, 0));
++  void spill(Register r, bool is64, int offset) {
++    is64 ? sd(r, Address(sp, offset))
++         : sw(r, Address(sp, offset));
++  }
 +
-+    j(L_second_loop_unaligned);
++  void spill(FloatRegister f, bool is64, int offset) {
++    is64 ? fsd(f, Address(sp, offset))
++         : fsw(f, Address(sp, offset));
 +  }
 +
-+  bind(L_multiply_64_x_64_loop);
-+  multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
++  void spill(VectorRegister v, int offset) {
++    add(t0, sp, offset);
++    vs1r_v(v, t0);
++  }
 +
-+  Label L_second_loop_aligned;
-+  beqz(kdx, L_second_loop_aligned);
++  void unspill(Register r, bool is64, int offset) {
++    is64 ? ld(r, Address(sp, offset))
++         : lw(r, Address(sp, offset));
++  }
 +
-+  Label L_carry;
-+  subw(kdx, kdx, 1);
-+  beqz(kdx, L_carry);
++  void unspillu(Register r, bool is64, int offset) {
++    is64 ? ld(r, Address(sp, offset))
++         : lwu(r, Address(sp, offset));
++  }
 +
-+  shadd(t0, kdx, z, t0, LogBytesPerInt);
-+  sw(carry, Address(t0, 0));
-+  srli(carry, carry, 32);
-+  subw(kdx, kdx, 1);
++  void unspill(FloatRegister f, bool is64, int offset) {
++    is64 ? fld(f, Address(sp, offset))
++         : flw(f, Address(sp, offset));
++  }
 +
-+  bind(L_carry);
-+  shadd(t0, kdx, z, t0, LogBytesPerInt);
-+  sw(carry, Address(t0, 0));
++  void unspill(VectorRegister v, int offset) {
++    add(t0, sp, offset);
++    vl1r_v(v, t0);
++  }
 +
-+  // Second and third (nested) loops.
-+  //
-+  // for (int i = xstart-1; i >= 0; i--) { // Second loop
-+  //   carry = 0;
-+  //   for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
-+  //     long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
-+  //                    (z[k] & LONG_MASK) + carry;
-+  //     z[k] = (int)product;
-+  //     carry = product >>> 32;
-+  //   }
-+  //   z[i] = (int)carry;
-+  // }
-+  //
-+  // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi
++  void minmax_FD(FloatRegister dst,
++                 FloatRegister src1, FloatRegister src2,
++                 bool is_double, bool is_min);
 +
-+  bind(L_second_loop_aligned);
-+  mv(carry, zr); // carry = 0;
-+  mv(jdx, ylen); // j = ystart+1
++};
 +
-+  subw(xstart, xstart, 1); // i = xstart-1;
-+  bltz(xstart, L_done);
++#ifdef ASSERT
++inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
++#endif
 +
-+  sub(sp, sp, 4 * wordSize);
-+  sd(z, Address(sp, 0));
++/**
++ * class SkipIfEqual:
++ *
++ * Instantiating this class will result in assembly code being output that will
++ * jump around any code emitted between the creation of the instance and it's
++ * automatic destruction at the end of a scope block, depending on the value of
++ * the flag passed to the constructor, which will be checked at run-time.
++ */
++class SkipIfEqual {
++ private:
++  MacroAssembler* _masm;
++  Label _label;
 +
-+  Label L_last_x;
-+  shadd(t0, xstart, z, t0, LogBytesPerInt);
-+  addi(z, t0, 4);
-+  subw(xstart, xstart, 1); // i = xstart-1;
-+  bltz(xstart, L_last_x);
++ public:
++   SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
++   ~SkipIfEqual();
++};
 +
-+  shadd(t0, xstart, x, t0, LogBytesPerInt);
-+  ld(product_hi, Address(t0, 0));
-+  ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian
++#endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp
+new file mode 100644
+index 0000000000..ef968ccd96
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp
+@@ -0,0 +1,31 @@
++/*
++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  Label L_third_loop_prologue;
-+  bind(L_third_loop_prologue);
++#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
++#define CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
 +
-+  sd(ylen, Address(sp, wordSize));
-+  sd(x, Address(sp, 2 * wordSize));
-+  sd(xstart, Address(sp, 3 * wordSize));
-+  multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product,
-+                          tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi);
-+  ld(z, Address(sp, 0));
-+  ld(ylen, Address(sp, wordSize));
-+  ld(x, Address(sp, 2 * wordSize));
-+  ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen
-+  addi(sp, sp, 4 * wordSize);
++// Still empty.
 +
-+  addiw(tmp3, xlen, 1);
-+  shadd(t0, tmp3, z, t0, LogBytesPerInt);
-+  sw(carry, Address(t0, 0));
++#endif // CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
+diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
+new file mode 100644
+index 0000000000..3c4e8847ce
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
+@@ -0,0 +1,444 @@
++/*
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  subw(tmp3, tmp3, 1);
-+  bltz(tmp3, L_done);
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "classfile/javaClasses.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "memory/allocation.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/flags/flagSetting.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/stubRoutines.hpp"
 +
-+  srli(carry, carry, 32);
-+  shadd(t0, tmp3, z, t0, LogBytesPerInt);
-+  sw(carry, Address(t0, 0));
-+  j(L_second_loop_aligned);
++#define __ _masm->
 +
-+  // Next infrequent code is moved outside loops.
-+  bind(L_last_x);
-+  lwu(product_hi, Address(x, 0));
-+  j(L_third_loop_prologue);
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#endif
 +
-+  bind(L_done);
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {
++  if (VerifyMethodHandles) {
++    verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class),
++                 "MH argument is a Class");
++  }
++  __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes()));
 +}
-+#endif
 +
-+// Count bits of trailing zero chars from lsb to msb until first non-zero element.
-+// For LL case, one byte for one element, so shift 8 bits once, and for other case,
-+// shift 16 bits once.
-+void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2)
-+{
-+  if (UseRVB) {
-+    assert_different_registers(Rd, Rs, tmp1);
-+    int step = isLL ? 8 : 16;
-+    ctz(Rd, Rs);
-+    andi(tmp1, Rd, step - 1);
-+    sub(Rd, Rd, tmp1);
-+    return;
-+  }
-+  assert_different_registers(Rd, Rs, tmp1, tmp2);
-+  Label Loop;
-+  int step = isLL ? 8 : 16;
-+  li(Rd, -step);
-+  mv(tmp2, Rs);
-+
-+  bind(Loop);
-+  addi(Rd, Rd, step);
-+  andi(tmp1, tmp2, ((1 << step) - 1));
-+  srli(tmp2, tmp2, step);
-+  beqz(tmp1, Loop);
-+}
-+
-+// This instruction reads adjacent 4 bytes from the lower half of source register,
-+// inflate into a register, for example:
-+// Rs: A7A6A5A4A3A2A1A0
-+// Rd: 00A300A200A100A0
-+void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2)
-+{
-+  assert_different_registers(Rd, Rs, tmp1, tmp2);
-+  li(tmp1, 0xFF);
-+  mv(Rd, zr);
-+  for (int i = 0; i <= 3; i++)
-+  {
-+    andr(tmp2, Rs, tmp1);
-+    if (i) {
-+      slli(tmp2, tmp2, i * 8);
-+    }
-+    orr(Rd, Rd, tmp2);
-+    if (i != 3) {
-+      slli(tmp1, tmp1, 8);
-+    }
-+  }
-+}
-+
-+// This instruction reads adjacent 4 bytes from the upper half of source register,
-+// inflate into a register, for example:
-+// Rs: A7A6A5A4A3A2A1A0
-+// Rd: 00A700A600A500A4
-+void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2)
-+{
-+  assert_different_registers(Rd, Rs, tmp1, tmp2);
-+  li(tmp1, 0xFF00000000);
-+  mv(Rd, zr);
-+  for (int i = 0; i <= 3; i++)
-+  {
-+    andr(tmp2, Rs, tmp1);
-+    orr(Rd, Rd, tmp2);
-+    srli(Rd, Rd, 8);
-+    if (i != 3) {
-+      slli(tmp1, tmp1, 8);
-+    }
-+  }
++#ifdef ASSERT
++static int check_nonzero(const char* xname, int x) {
++  assert(x != 0, "%s should be nonzero", xname);
++  return x;
 +}
++#define NONZERO(x) check_nonzero(#x, x)
++#else //ASSERT
++#define NONZERO(x) (x)
++#endif //PRODUCT
 +
-+// The size of the blocks erased by the zero_blocks stub.  We must
-+// handle anything smaller than this ourselves in zero_words().
-+const int MacroAssembler::zero_words_block_size = 8;
-+
-+// zero_words() is used by C2 ClearArray patterns.  It is as small as
-+// possible, handling small word counts locally and delegating
-+// anything larger to the zero_blocks stub.  It is expanded many times
-+// in compiled code, so it is important to keep it short.
-+
-+// ptr:   Address of a buffer to be zeroed.
-+// cnt:   Count in HeapWords.
-+//
-+// ptr, cnt, and t0 are clobbered.
-+address MacroAssembler::zero_words(Register ptr, Register cnt)
-+{
-+  assert(is_power_of_2(zero_words_block_size), "adjust this");
-+  assert(ptr == x28 && cnt == x29, "mismatch in register usage");
-+  assert_different_registers(cnt, t0);
-+
-+  BLOCK_COMMENT("zero_words {");
-+  mv(t0, zero_words_block_size);
-+  Label around, done, done16;
-+  bltu(cnt, t0, around);
-+  {
-+    RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks());
-+    assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated");
-+    if (StubRoutines::riscv::complete()) {
-+      address tpc = trampoline_call(zero_blocks);
-+      if (tpc == NULL) {
-+        DEBUG_ONLY(reset_labels(around));
-+        postcond(pc() == badAddress);
-+        return NULL;
-+      }
-+    } else {
-+      jal(zero_blocks);
-+    }
-+  }
-+  bind(around);
-+  for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) {
-+    Label l;
-+    andi(t0, cnt, i);
-+    beqz(t0, l);
-+    for (int j = 0; j < i; j++) {
-+      sd(zr, Address(ptr, 0));
-+      addi(ptr, ptr, 8);
-+    }
-+    bind(l);
-+  }
-+  {
-+    Label l;
-+    andi(t0, cnt, 1);
-+    beqz(t0, l);
-+    sd(zr, Address(ptr, 0));
-+    bind(l);
-+  }
-+  BLOCK_COMMENT("} zero_words");
-+  postcond(pc() != badAddress);
-+  return pc();
++#ifdef ASSERT
++void MethodHandles::verify_klass(MacroAssembler* _masm,
++                                 Register obj, SystemDictionary::WKID klass_id,
++                                 const char* error_message) {
++  InstanceKlass** klass_addr = SystemDictionary::well_known_klass_addr(klass_id);
++  Klass* klass = SystemDictionary::well_known_klass(klass_id);
++  Register temp = t1;
++  Register temp2 = t0; // used by MacroAssembler::cmpptr
++  Label L_ok, L_bad;
++  BLOCK_COMMENT("verify_klass {");
++  __ verify_oop(obj);
++  __ beqz(obj, L_bad);
++  __ push_reg(RegSet::of(temp, temp2), sp);
++  __ load_klass(temp, obj);
++  __ cmpptr(temp, ExternalAddress((address) klass_addr), L_ok);
++  intptr_t super_check_offset = klass->super_check_offset();
++  __ ld(temp, Address(temp, super_check_offset));
++  __ cmpptr(temp, ExternalAddress((address) klass_addr), L_ok);
++  __ pop_reg(RegSet::of(temp, temp2), sp);
++  __ bind(L_bad);
++  __ stop(error_message);
++  __ BIND(L_ok);
++  __ pop_reg(RegSet::of(temp, temp2), sp);
++  BLOCK_COMMENT("} verify_klass");
 +}
 +
-+#define SmallArraySize (18 * BytesPerLong)
++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {}
 +
-+// base:  Address of a buffer to be zeroed, 8 bytes aligned.
-+// cnt:   Immediate count in HeapWords.
-+void MacroAssembler::zero_words(Register base, u_int64_t cnt)
-+{
-+  assert_different_registers(base, t0, t1);
++#endif //ASSERT
 +
-+  BLOCK_COMMENT("zero_words {");
++void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
++                                            bool for_compiler_entry) {
++  assert(method == xmethod, "interpreter calling convention");
++  Label L_no_such_method;
++  __ beqz(xmethod, L_no_such_method);
++  __ verify_method_ptr(method);
 +
-+  if (cnt <= SmallArraySize / BytesPerLong) {
-+    for (int i = 0; i < (int)cnt; i++) {
-+      sd(zr, Address(base, i * wordSize));
-+    }
-+  } else {
-+    const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll
-+    int remainder = cnt % unroll;
-+    for (int i = 0; i < remainder; i++) {
-+      sd(zr, Address(base, i * wordSize));
-+    }
++  if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) {
++    Label run_compiled_code;
++    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
++    // compiled code in threads for which the event is enabled.  Check here for
++    // interp_only_mode if these events CAN be enabled.
 +
-+    Label loop;
-+    Register cnt_reg = t0;
-+    Register loop_base = t1;
-+    cnt = cnt - remainder;
-+    li(cnt_reg, cnt);
-+    add(loop_base, base, remainder * wordSize);
-+    bind(loop);
-+    sub(cnt_reg, cnt_reg, unroll);
-+    for (int i = 0; i < unroll; i++) {
-+      sd(zr, Address(loop_base, i * wordSize));
-+    }
-+    add(loop_base, loop_base, unroll * wordSize);
-+    bnez(cnt_reg, loop);
++    __ lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset()));
++    __ beqz(t0, run_compiled_code);
++    __ ld(t0, Address(method, Method::interpreter_entry_offset()));
++    __ jr(t0);
++    __ BIND(run_compiled_code);
 +  }
 +
-+  BLOCK_COMMENT("} zero_words");
++  const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() :
++                                                     Method::from_interpreted_offset();
++  __ ld(t0,Address(method, entry_offset));
++  __ jr(t0);
++  __ bind(L_no_such_method);
++  __ far_jump(RuntimeAddress(StubRoutines::throw_AbstractMethodError_entry()));
 +}
 +
-+// base:   Address of a buffer to be filled, 8 bytes aligned.
-+// cnt:    Count in 8-byte unit.
-+// value:  Value to be filled with.
-+// base will point to the end of the buffer after filling.
-+void MacroAssembler::fill_words(Register base, Register cnt, Register value)
-+{
-+//  Algorithm:
-+//
-+//    t0 = cnt & 7
-+//    cnt -= t0
-+//    p += t0
-+//    switch (t0):
-+//      switch start:
-+//      do while cnt
-+//        cnt -= 8
-+//          p[-8] = value
-+//        case 7:
-+//          p[-7] = value
-+//        case 6:
-+//          p[-6] = value
-+//          // ...
-+//        case 1:
-+//          p[-1] = value
-+//        case 0:
-+//          p += 8
-+//      do-while end
-+//    switch end
-+
-+  assert_different_registers(base, cnt, value, t0, t1);
-+
-+  Label fini, skip, entry, loop;
-+  const int unroll = 8; // Number of sd instructions we'll unroll
-+
-+  beqz(cnt, fini);
++void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
++                                        Register recv, Register method_temp,
++                                        Register temp2,
++                                        bool for_compiler_entry) {
++  BLOCK_COMMENT("jump_to_lambda_form {");
++  // This is the initial entry point of a lazy method handle.
++  // After type checking, it picks up the invoker from the LambdaForm.
++  assert_different_registers(recv, method_temp, temp2);
++  assert(recv != noreg, "required register");
++  assert(method_temp == xmethod, "required register for loading method");
 +
-+  andi(t0, cnt, unroll - 1);
-+  sub(cnt, cnt, t0);
-+  // align 8, so first sd n % 8 = mod, next loop sd 8 * n.
-+  shadd(base, t0, base, t1, 3);
-+  la(t1, entry);
-+  slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst)
-+  sub(t1, t1, t0);
-+  jr(t1);
++  // Load the invoker, as MH -> MH.form -> LF.vmentry
++  __ verify_oop(recv);
++  __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), temp2);
++  __ verify_oop(method_temp);
++  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), temp2);
++  __ verify_oop(method_temp);
++  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())), temp2);
++  __ verify_oop(method_temp);
++  __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())), noreg, noreg);
 +
-+  bind(loop);
-+  add(base, base, unroll * 8);
-+  for (int i = -unroll; i < 0; i++) {
-+    sd(value, Address(base, i * 8));
++  if (VerifyMethodHandles && !for_compiler_entry) {
++    // make sure recv is already on stack
++    __ ld(temp2, Address(method_temp, Method::const_offset()));
++    __ load_sized_value(temp2,
++                        Address(temp2, ConstMethod::size_of_parameters_offset()),
++                        sizeof(u2), /*is_signed*/ false);
++    Label L;
++    __ ld(t0, __ argument_address(temp2, -1));
++    __ beq(recv, t0, L);
++    __ ld(x10, __ argument_address(temp2, -1));
++    __ ebreak();
++    __ BIND(L);
 +  }
-+  bind(entry);
-+  sub(cnt, cnt, unroll);
-+  bgez(cnt, loop);
-+
-+  bind(fini);
-+}
-+
-+#define FCVT_SAFE(FLOATCVT, FLOATEQ)                                                             \
-+void MacroAssembler:: FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) {           \
-+  Label L_Okay;                                                                                  \
-+  fscsr(zr);                                                                                     \
-+  FLOATCVT(dst, src);                                                                            \
-+  frcsr(tmp);                                                                                    \
-+  andi(tmp, tmp, 0x1E);                                                                          \
-+  beqz(tmp, L_Okay);                                                                             \
-+  FLOATEQ(tmp, src, src);                                                                        \
-+  bnez(tmp, L_Okay);                                                                             \
-+  mv(dst, zr);                                                                                   \
-+  bind(L_Okay);                                                                                  \
-+}
-+
-+FCVT_SAFE(fcvt_w_s, feq_s)
-+FCVT_SAFE(fcvt_l_s, feq_s)
-+FCVT_SAFE(fcvt_w_d, feq_d)
-+FCVT_SAFE(fcvt_l_d, feq_d)
 +
-+#undef FCVT_SAFE
-+
-+#define FCMP(FLOATTYPE, FLOATSIG)                                                       \
-+void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1,            \
-+                                         FloatRegister Rs2, int unordered_result) {     \
-+  Label Ldone;                                                                          \
-+  if (unordered_result < 0) {                                                           \
-+    /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */    \
-+    /* installs 1 if gt else 0 */                                                       \
-+    flt_##FLOATSIG(result, Rs2, Rs1);                                                   \
-+    /* Rs1 > Rs2, install 1 */                                                          \
-+    bgtz(result, Ldone);                                                                \
-+    feq_##FLOATSIG(result, Rs1, Rs2);                                                   \
-+    addi(result, result, -1);                                                           \
-+    /* Rs1 = Rs2, install 0 */                                                          \
-+    /* NaN or Rs1 < Rs2, install -1 */                                                  \
-+    bind(Ldone);                                                                        \
-+  } else {                                                                              \
-+    /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */    \
-+    /* installs 1 if gt or unordered else 0 */                                          \
-+    flt_##FLOATSIG(result, Rs1, Rs2);                                                   \
-+    /* Rs1 < Rs2, install -1 */                                                         \
-+    bgtz(result, Ldone);                                                                \
-+    feq_##FLOATSIG(result, Rs1, Rs2);                                                   \
-+    addi(result, result, -1);                                                           \
-+    /* Rs1 = Rs2, install 0 */                                                          \
-+    /* NaN or Rs1 > Rs2, install 1 */                                                   \
-+    bind(Ldone);                                                                        \
-+    neg(result, result);                                                                \
-+  }                                                                                     \
++  jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry);
++  BLOCK_COMMENT("} jump_to_lambda_form");
 +}
 +
-+FCMP(float, s);
-+FCMP(double, d);
-+
-+#undef FCMP
-+
-+// Zero words; len is in bytes
-+// Destroys all registers except addr
-+// len must be a nonzero multiple of wordSize
-+void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) {
-+  assert_different_registers(addr, len, tmp, t0, t1);
-+
-+#ifdef ASSERT
-+  {
-+    Label L;
-+    andi(t0, len, BytesPerWord - 1);
-+    beqz(t0, L);
-+    stop("len is not a multiple of BytesPerWord");
-+    bind(L);
++// Code generation
++address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
++                                                                vmIntrinsics::ID iid) {
++  const bool not_for_compiler_entry = false;  // this is the interpreter entry
++  assert(is_signature_polymorphic(iid), "expected invoke iid");
++  if (iid == vmIntrinsics::_invokeGeneric ||
++      iid == vmIntrinsics::_compiledLambdaForm) {
++    // Perhaps surprisingly, the symbolic references visible to Java are not directly used.
++    // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
++    // They all allow an appendix argument.
++    __ ebreak();           // empty stubs make SG sick
++    return NULL;
 +  }
-+#endif // ASSERT
-+
-+#ifndef PRODUCT
-+  block_comment("zero memory");
-+#endif // PRODUCT
-+
-+  Label loop;
-+  Label entry;
 +
-+  // Algorithm:
-+  //
-+  //  t0 = cnt & 7
-+  //  cnt -= t0
-+  //  p += t0
-+  //  switch (t0) {
-+  //    do {
-+  //      cnt -= 8
-+  //        p[-8] = 0
-+  //      case 7:
-+  //        p[-7] = 0
-+  //      case 6:
-+  //        p[-6] = 0
-+  //        ...
-+  //      case 1:
-+  //        p[-1] = 0
-+  //      case 0:
-+  //        p += 8
-+  //     } while (cnt)
-+  //  }
++  // x30: sender SP (must preserve; see prepare_to_jump_from_interpreted)
++  // xmethod: Method*
++  // x13: argument locator (parameter slot count, added to sp)
++  // x11: used as temp to hold mh or receiver
++  // x10, x29: garbage temps, blown away
++  Register argp   = x13;   // argument list ptr, live on error paths
++  Register mh     = x11;   // MH receiver; dies quickly and is recycled
 +
-+  const int unroll = 8;   // Number of sd(zr) instructions we'll unroll
++  // here's where control starts out:
++  __ align(CodeEntryAlignment);
++  address entry_point = __ pc();
 +
-+  srli(len, len, LogBytesPerWord);
-+  andi(t0, len, unroll - 1);  // t0 = cnt % unroll
-+  sub(len, len, t0);          // cnt -= unroll
-+  // tmp always points to the end of the region we're about to zero
-+  shadd(tmp, t0, addr, t1, LogBytesPerWord);
-+  la(t1, entry);
-+  slli(t0, t0, 2);
-+  sub(t1, t1, t0);
-+  jr(t1);
-+  bind(loop);
-+  sub(len, len, unroll);
-+  for (int i = -unroll; i < 0; i++) {
-+    Assembler::sd(zr, Address(tmp, i * wordSize));
-+  }
-+  bind(entry);
-+  add(tmp, tmp, unroll * wordSize);
-+  bnez(len, loop);
-+}
++  if (VerifyMethodHandles) {
++    assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
 +
-+// shift left by shamt and add
-+// Rd = (Rs1 << shamt) + Rs2
-+void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) {
-+  if (UseRVB) {
-+    if (shamt == 1) {
-+      sh1add(Rd, Rs1, Rs2);
-+      return;
-+    } else if (shamt == 2) {
-+      sh2add(Rd, Rs1, Rs2);
-+      return;
-+    } else if (shamt == 3) {
-+      sh3add(Rd, Rs1, Rs2);
-+      return;
++    Label L;
++    BLOCK_COMMENT("verify_intrinsic_id {");
++    __ lhu(t0, Address(xmethod, Method::intrinsic_id_offset_in_bytes()));
++    __ mv(t1, (int) iid);
++    __ beq(t0, t1, L);
++    if (iid == vmIntrinsics::_linkToVirtual ||
++        iid == vmIntrinsics::_linkToSpecial) {
++      // could do this for all kinds, but would explode assembly code size
++      trace_method_handle(_masm, "bad Method*::intrinsic_id");
 +    }
++    __ ebreak();
++    __ bind(L);
++    BLOCK_COMMENT("} verify_intrinsic_id");
 +  }
 +
-+  if (shamt != 0) {
-+    slli(tmp, Rs1, shamt);
-+    add(Rd, Rs2, tmp);
++  // First task:  Find out how big the argument list is.
++  Address x13_first_arg_addr;
++  int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
++  assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
++  if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
++    __ ld(argp, Address(xmethod, Method::const_offset()));
++    __ load_sized_value(argp,
++                        Address(argp, ConstMethod::size_of_parameters_offset()),
++                        sizeof(u2), /*is_signed*/ false);
++    x13_first_arg_addr = __ argument_address(argp, -1);
 +  } else {
-+    add(Rd, Rs1, Rs2);
++    DEBUG_ONLY(argp = noreg);
 +  }
-+}
 +
-+void MacroAssembler::zero_extend(Register dst, Register src, int bits) {
-+  if (UseRVB) {
-+    if (bits == 16) {
-+      zext_h(dst, src);
-+      return;
-+    } else if (bits == 32) {
-+      zext_w(dst, src);
-+      return;
-+    }
++  if (!is_signature_polymorphic_static(iid)) {
++    __ ld(mh, x13_first_arg_addr);
++    DEBUG_ONLY(argp = noreg);
 +  }
 +
-+  if (bits == 8) {
-+    zext_b(dst, src);
-+  } else {
-+    slli(dst, src, XLEN - bits);
-+    srli(dst, dst, XLEN - bits);
-+  }
-+}
++  // x13_first_arg_addr is live!
 +
-+void MacroAssembler::sign_extend(Register dst, Register src, int bits) {
-+  if (UseRVB) {
-+    if (bits == 8) {
-+      sext_b(dst, src);
-+      return;
-+    } else if (bits == 16) {
-+      sext_h(dst, src);
-+      return;
++  trace_method_handle_interpreter_entry(_masm, iid);
++  if (iid == vmIntrinsics::_invokeBasic) {
++    generate_method_handle_dispatch(_masm, iid, mh, noreg, not_for_compiler_entry);
++  } else {
++    // Adjust argument list by popping the trailing MemberName argument.
++    Register recv = noreg;
++    if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
++      // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack.
++      __ ld(recv = x12, x13_first_arg_addr);
 +    }
++    DEBUG_ONLY(argp = noreg);
++    Register xmember = xmethod;  // MemberName ptr; incoming method ptr is dead now
++    __ pop_reg(xmember);             // extract last argument
++    generate_method_handle_dispatch(_masm, iid, recv, xmember, not_for_compiler_entry);
 +  }
 +
-+  if (bits == 32) {
-+    sext_w(dst, src);
-+  } else {
-+    slli(dst, src, XLEN - bits);
-+    srai(dst, dst, XLEN - bits);
-+  }
++  return entry_point;
 +}
 +
-+void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp)
-+{
-+  if (src1 == src2) {
-+    mv(dst, zr);
-+    return;
-+  }
-+  Label done;
-+  Register left = src1;
-+  Register right = src2;
-+  if (dst == src1) {
-+    assert_different_registers(dst, src2, tmp);
-+    mv(tmp, src1);
-+    left = tmp;
-+  } else if (dst == src2) {
-+    assert_different_registers(dst, src1, tmp);
-+    mv(tmp, src2);
-+    right = tmp;
++
++void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
++                                                    vmIntrinsics::ID iid,
++                                                    Register receiver_reg,
++                                                    Register member_reg,
++                                                    bool for_compiler_entry) {
++  assert(is_signature_polymorphic(iid), "expected invoke iid");
++  // temps used in this code are not used in *either* compiled or interpreted calling sequences
++  Register temp1 = x7;
++  Register temp2 = x28;
++  Register temp3 = x29;  // x30 is live by this point: it contains the sender SP
++  if (for_compiler_entry) {
++    assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment");
++    assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
++    assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
++    assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
 +  }
 +
-+  // installs 1 if gt else 0
-+  slt(dst, right, left);
-+  bnez(dst, done);
-+  slt(dst, left, right);
-+  // dst = -1 if lt; else if eq , dst = 0
-+  neg(dst, dst);
-+  bind(done);
-+}
++  assert_different_registers(temp1, temp2, temp3, receiver_reg);
++  assert_different_registers(temp1, temp2, temp3, member_reg);
 +
-+void MacroAssembler::safepoint_ifence() {
-+  ifence();
-+#ifndef PRODUCT
-+  if (VerifyCrossModifyFence) {
-+    // Clear the thread state.
-+    sb(zr, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset())));
++  if (iid == vmIntrinsics::_invokeBasic) {
++    // indirect through MH.form.vmentry.vmtarget
++    jump_to_lambda_form(_masm, receiver_reg, xmethod, temp1, for_compiler_entry);
++  } else {
++    // The method is a member invoker used by direct method handles.
++    if (VerifyMethodHandles) {
++      // make sure the trailing argument really is a MemberName (caller responsibility)
++      verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName),
++                   "MemberName required for invokeVirtual etc.");
++    }
++
++    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()));
++    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()));
++    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes()));
++    Address vmtarget_method( xmethod, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes()));
++
++    Register temp1_recv_klass = temp1;
++    if (iid != vmIntrinsics::_linkToStatic) {
++      __ verify_oop(receiver_reg);
++      if (iid == vmIntrinsics::_linkToSpecial) {
++        // Don't actually load the klass; just null-check the receiver.
++        __ null_check(receiver_reg);
++      } else {
++        // load receiver klass itself
++        __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes());
++        __ load_klass(temp1_recv_klass, receiver_reg);
++        __ verify_klass_ptr(temp1_recv_klass);
++      }
++      BLOCK_COMMENT("check_receiver {");
++      // The receiver for the MemberName must be in receiver_reg.
++      // Check the receiver against the MemberName.clazz
++      if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
++        // Did not load it above...
++        __ load_klass(temp1_recv_klass, receiver_reg);
++        __ verify_klass_ptr(temp1_recv_klass);
++      }
++      if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
++        Label L_ok;
++        Register temp2_defc = temp2;
++        __ load_heap_oop(temp2_defc, member_clazz, temp3);
++        load_klass_from_Class(_masm, temp2_defc);
++        __ verify_klass_ptr(temp2_defc);
++        __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok);
++        // If we get here, the type check failed!
++        __ ebreak();
++        __ bind(L_ok);
++      }
++      BLOCK_COMMENT("} check_receiver");
++    }
++    if (iid == vmIntrinsics::_linkToSpecial ||
++        iid == vmIntrinsics::_linkToStatic) {
++      DEBUG_ONLY(temp1_recv_klass = noreg);  // these guys didn't load the recv_klass
++    }
++
++    // Live registers at this point:
++    //  member_reg - MemberName that was the trailing argument
++    //  temp1_recv_klass - klass of stacked receiver, if needed
++    //  x30 - interpreter linkage (if interpreted)
++    //  x11 ... x10 - compiler arguments (if compiled)
++
++    Label L_incompatible_class_change_error;
++    switch (iid) {
++      case vmIntrinsics::_linkToSpecial:
++        if (VerifyMethodHandles) {
++          verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3);
++        }
++        __ load_heap_oop(xmethod, member_vmtarget);
++        __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg);
++        break;
++
++      case vmIntrinsics::_linkToStatic:
++        if (VerifyMethodHandles) {
++          verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3);
++        }
++        __ load_heap_oop(xmethod, member_vmtarget);
++        __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg);
++        break;
++
++      case vmIntrinsics::_linkToVirtual:
++      {
++        // same as TemplateTable::invokevirtual,
++        // minus the CP setup and profiling:
++
++        if (VerifyMethodHandles) {
++          verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3);
++        }
++
++        // pick out the vtable index from the MemberName, and then we can discard it:
++        Register temp2_index = temp2;
++        __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg);
++
++        if (VerifyMethodHandles) {
++          Label L_index_ok;
++          __ bgez(temp2_index, L_index_ok);
++          __ ebreak();
++          __ BIND(L_index_ok);
++        }
++
++        // Note:  The verifier invariants allow us to ignore MemberName.clazz and vmtarget
++        // at this point.  And VerifyMethodHandles has already checked clazz, if needed.
++
++        // get target Method* & entry point
++        __ lookup_virtual_method(temp1_recv_klass, temp2_index, xmethod);
++        break;
++      }
++
++      case vmIntrinsics::_linkToInterface:
++      {
++        // same as TemplateTable::invokeinterface
++        // (minus the CP setup and profiling, with different argument motion)
++        if (VerifyMethodHandles) {
++          verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
++        }
++
++        Register temp3_intf = temp3;
++        __ load_heap_oop(temp3_intf, member_clazz);
++        load_klass_from_Class(_masm, temp3_intf);
++        __ verify_klass_ptr(temp3_intf);
++
++        Register rindex = xmethod;
++        __ access_load_at(T_ADDRESS, IN_HEAP, rindex, member_vmindex, noreg, noreg);
++        if (VerifyMethodHandles) {
++          Label L;
++          __ bgez(rindex, L);
++          __ ebreak();
++          __ bind(L);
++        }
++
++        // given intf, index, and recv klass, dispatch to the implementation method
++        __ lookup_interface_method(temp1_recv_klass, temp3_intf,
++                                   // note: next two args must be the same:
++                                   rindex, xmethod,
++                                   temp2,
++                                   L_incompatible_class_change_error);
++        break;
++      }
++
++      default:
++        fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid));
++        break;
++    }
++
++    // live at this point:  xmethod, x30 (if interpreted)
++
++    // After figuring out which concrete method to call, jump into it.
++    // Note that this works in the interpreter with no data motion.
++    // But the compiled version will require that r2_recv be shifted out.
++    __ verify_method_ptr(xmethod);
++    jump_from_method_handle(_masm, xmethod, temp1, for_compiler_entry);
++    if (iid == vmIntrinsics::_linkToInterface) {
++      __ bind(L_incompatible_class_change_error);
++      __ far_jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry()));
++    }
 +  }
-+#endif
++
 +}
 +
 +#ifndef PRODUCT
-+void MacroAssembler::verify_cross_modify_fence_not_required() {
-+  if (VerifyCrossModifyFence) {
-+    // Check if thread needs a cross modify fence.
-+    lbu(t0, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset())));
-+    Label fence_not_required;
-+    beqz(t0, fence_not_required);
-+    // If it does then fail.
-+    la(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::verify_cross_modify_fence_failure)));
-+    mv(c_rarg0, xthread);
-+    jalr(t0);
-+    bind(fence_not_required);
-+  }
-+}
-+#endif
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
++void trace_method_handle_stub(const char* adaptername,
++                              oopDesc* mh,
++                              intptr_t* saved_regs,
++                              intptr_t* entry_sp) {  }
++
++// The stub wraps the arguments in a struct on the stack to avoid
++// dealing with the different calling conventions for passing 6
++// arguments.
++struct MethodHandleStubArguments {
++  const char* adaptername;
++  oopDesc* mh;
++  intptr_t* saved_regs;
++  intptr_t* entry_sp;
++};
++void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) {  }
++
++void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {  }
++#endif //PRODUCT
+diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
 new file mode 100644
-index 00000000000..23e09475be1
+index 0000000000..65493eba76
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -0,0 +1,858 @@
++++ b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
+@@ -0,0 +1,57 @@
 +/*
-+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -26571,846 +25873,467 @@ index 00000000000..23e09475be1
 + *
 + */
 +
-+#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP
-+#define CPU_RISCV_MACROASSEMBLER_RISCV_HPP
-+
-+#include "asm/assembler.hpp"
-+#include "metaprogramming/enableIf.hpp"
-+#include "oops/compressedOops.hpp"
-+#include "utilities/powerOfTwo.hpp"
++// Platform-specific definitions for method handles.
++// These definitions are inlined into class MethodHandles.
 +
-+// MacroAssembler extends Assembler by frequently used macros.
-+//
-+// Instructions for which a 'better' code sequence exists depending
-+// on arguments should also go in here.
++// Adapters
++enum /* platform_dependent_constants */ {
++  adapter_code_size = 32000 DEBUG_ONLY(+ 120000)
++};
 +
-+class MacroAssembler: public Assembler {
++public:
 +
-+ public:
-+  MacroAssembler(CodeBuffer* code) : Assembler(code) {
-+  }
-+  virtual ~MacroAssembler() {}
++  static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg);
 +
-+  void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod);
++  static void verify_klass(MacroAssembler* _masm,
++                           Register obj, SystemDictionary::WKID klass_id,
++                           const char* error_message = "wrong klass") NOT_DEBUG_RETURN;
 +
-+  // Place a fence.i after code may have been modified due to a safepoint.
-+  void safepoint_ifence();
++  static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) {
++    verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle),
++                 "reference is a MH");
++  }
 +
-+  // Alignment
-+  void align(int modulus, int extra_offset = 0);
++  static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
 +
-+  // Stack frame creation/removal
-+  // Note that SP must be updated to the right place before saving/restoring RA and FP
-+  // because signal based thread suspend/resume could happen asynchronously.
-+  void enter() {
-+    addi(sp, sp, - 2 * wordSize);
-+    sd(ra, Address(sp, wordSize));
-+    sd(fp, Address(sp));
-+    addi(fp, sp, 2 * wordSize);
-+  }
++  // Similar to InterpreterMacroAssembler::jump_from_interpreted.
++  // Takes care of special dispatch from single stepping too.
++  static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
++                                      bool for_compiler_entry);
 +
-+  void leave() {
-+    addi(sp, fp, - 2 * wordSize);
-+    ld(fp, Address(sp));
-+    ld(ra, Address(sp, wordSize));
-+    addi(sp, sp, 2 * wordSize);
-+  }
++  static void jump_to_lambda_form(MacroAssembler* _masm,
++                                  Register recv, Register method_temp,
++                                  Register temp2,
++                                  bool for_compiler_entry);
+diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
+new file mode 100644
+index 0000000000..ecce8eb021
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
+@@ -0,0 +1,414 @@
++/*
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/compiledIC.hpp"
++#include "memory/resourceArea.hpp"
++#include "nativeInst_riscv.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.hpp"
++#include "runtime/orderAccess.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/ostream.hpp"
++#ifdef COMPILER1
++#include "c1/c1_Runtime1.hpp"
++#endif
 +
-+  // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
-+  // The pointer will be loaded into the thread register.
-+  void get_thread(Register thread);
++Register NativeInstruction::extract_rs1(address instr) {
++  assert_cond(instr != NULL);
++  return as_Register(Assembler::extract(((unsigned*)instr)[0], 19, 15));
++}
 +
-+  // Support for VM calls
-+  //
-+  // It is imperative that all calls into the VM are handled via the call_VM macros.
-+  // They make sure that the stack linkage is setup correctly. call_VM's correspond
-+  // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
++Register NativeInstruction::extract_rs2(address instr) {
++  assert_cond(instr != NULL);
++  return as_Register(Assembler::extract(((unsigned*)instr)[0], 24, 20));
++}
 +
-+  void call_VM(Register oop_result,
-+               address entry_point,
-+               bool check_exceptions = true);
-+  void call_VM(Register oop_result,
-+               address entry_point,
-+               Register arg_1,
-+               bool check_exceptions = true);
-+  void call_VM(Register oop_result,
-+               address entry_point,
-+               Register arg_1, Register arg_2,
-+               bool check_exceptions = true);
-+  void call_VM(Register oop_result,
-+               address entry_point,
-+               Register arg_1, Register arg_2, Register arg_3,
-+               bool check_exceptions = true);
++Register NativeInstruction::extract_rd(address instr) {
++  assert_cond(instr != NULL);
++  return as_Register(Assembler::extract(((unsigned*)instr)[0], 11, 7));
++}
 +
-+  // Overloadings with last_Java_sp
-+  void call_VM(Register oop_result,
-+               Register last_java_sp,
-+               address entry_point,
-+               int number_of_arguments = 0,
-+               bool check_exceptions = true);
-+  void call_VM(Register oop_result,
-+               Register last_java_sp,
-+               address entry_point,
-+               Register arg_1,
-+               bool check_exceptions = true);
-+  void call_VM(Register oop_result,
-+               Register last_java_sp,
-+               address entry_point,
-+               Register arg_1, Register arg_2,
-+               bool check_exceptions = true);
-+  void call_VM(Register oop_result,
-+               Register last_java_sp,
-+               address entry_point,
-+               Register arg_1, Register arg_2, Register arg_3,
-+               bool check_exceptions = true);
++uint32_t NativeInstruction::extract_opcode(address instr) {
++  assert_cond(instr != NULL);
++  return Assembler::extract(((unsigned*)instr)[0], 6, 0);
++}
 +
-+  void get_vm_result(Register oop_result, Register java_thread);
-+  void get_vm_result_2(Register metadata_result, Register java_thread);
++uint32_t NativeInstruction::extract_funct3(address instr) {
++  assert_cond(instr != NULL);
++  return Assembler::extract(((unsigned*)instr)[0], 14, 12);
++}
 +
-+  // These always tightly bind to MacroAssembler::call_VM_leaf_base
-+  // bypassing the virtual implementation
-+  void call_VM_leaf(address entry_point,
-+                    int number_of_arguments = 0);
-+  void call_VM_leaf(address entry_point,
-+                    Register arg_0);
-+  void call_VM_leaf(address entry_point,
-+                    Register arg_0, Register arg_1);
-+  void call_VM_leaf(address entry_point,
-+                    Register arg_0, Register arg_1, Register arg_2);
++bool NativeInstruction::is_pc_relative_at(address instr) {
++  // auipc + jalr
++  // auipc + addi
++  // auipc + load
++  // auipc + fload_load
++  return (is_auipc_at(instr)) &&
++         (is_addi_at(instr + instruction_size) ||
++          is_jalr_at(instr + instruction_size) ||
++          is_load_at(instr + instruction_size) ||
++          is_float_load_at(instr + instruction_size)) &&
++         check_pc_relative_data_dependency(instr);
++}
 +
-+  // These always tightly bind to MacroAssembler::call_VM_base
-+  // bypassing the virtual implementation
-+  void super_call_VM_leaf(address entry_point, Register arg_0);
-+  void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1);
-+  void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2);
-+  void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3);
++// ie:ld(Rd, Label)
++bool NativeInstruction::is_load_pc_relative_at(address instr) {
++  return is_auipc_at(instr) && // auipc
++         is_ld_at(instr + instruction_size) && // ld
++         check_load_pc_relative_data_dependency(instr);
++}
 +
-+  // last Java Frame (fills frame anchor)
-+  void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp);
-+  void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp);
-+  void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc, Register tmp);
++bool NativeInstruction::is_movptr_at(address instr) {
++  return is_lui_at(instr) && // Lui
++         is_addi_at(instr + instruction_size) && // Addi
++         is_slli_shift_at(instr + instruction_size * 2, 11) && // Slli Rd, Rs, 11
++         is_addi_at(instr + instruction_size * 3) && // Addi
++         is_slli_shift_at(instr + instruction_size * 4, 6) && // Slli Rd, Rs, 6
++         (is_addi_at(instr + instruction_size * 5) ||
++          is_jalr_at(instr + instruction_size * 5) ||
++          is_load_at(instr + instruction_size * 5)) && // Addi/Jalr/Load
++         check_movptr_data_dependency(instr);
++}
 +
-+  // thread in the default location (xthread)
-+  void reset_last_Java_frame(bool clear_fp);
++bool NativeInstruction::is_li32_at(address instr) {
++  return is_lui_at(instr) && // lui
++         is_addiw_at(instr + instruction_size) && // addiw
++         check_li32_data_dependency(instr);
++}
 +
-+  void call_native(address entry_point,
-+                   Register arg_0);
-+  void call_native_base(
-+    address entry_point,                // the entry point
-+    Label*  retaddr = NULL
-+  );
++void NativeCall::verify() {
++  assert(NativeCall::is_call_at((address)this), "unexpected code at call site");
++}
 +
-+  virtual void call_VM_leaf_base(
-+    address entry_point,                // the entry point
-+    int     number_of_arguments,        // the number of arguments to pop after the call
-+    Label*  retaddr = NULL
-+  );
++address NativeCall::destination() const {
++  address addr = (address)this;
++  assert(NativeInstruction::is_jal_at(instruction_address()), "inst must be jal.");
++  address destination = MacroAssembler::target_addr_for_insn(instruction_address());
 +
-+  virtual void call_VM_leaf_base(
-+    address entry_point,                // the entry point
-+    int     number_of_arguments,        // the number of arguments to pop after the call
-+    Label&  retaddr) {
-+    call_VM_leaf_base(entry_point, number_of_arguments, &retaddr);
++  // Do we use a trampoline stub for this call?
++  CodeBlob* cb = CodeCache::find_blob_unsafe(addr);   // Else we get assertion if nmethod is zombie.
++  assert(cb && cb->is_nmethod(), "sanity");
++  nmethod *nm = (nmethod *)cb;
++  if (nm != NULL && nm->stub_contains(destination) && is_NativeCallTrampolineStub_at(destination)) {
++    // Yes we do, so get the destination from the trampoline stub.
++    const address trampoline_stub_addr = destination;
++    destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination();
 +  }
 +
-+  virtual void call_VM_base(           // returns the register containing the thread upon return
-+    Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
-+    Register java_thread,              // the thread if computed before     ; use noreg otherwise
-+    Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
-+    address  entry_point,              // the entry point
-+    int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
-+    bool     check_exceptions          // whether to check for pending exceptions after return
-+  );
++  return destination;
++}
 +
-+  void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions);
++// Similar to replace_mt_safe, but just changes the destination. The
++// important thing is that free-running threads are able to execute this
++// call instruction at all times.
++//
++// Used in the runtime linkage of calls; see class CompiledIC.
++//
++// Add parameter assert_lock to switch off assertion
++// during code generation, where no patching lock is needed.
++void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
++  assert(!assert_lock ||
++         (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()),
++         "concurrent code patching");
 +
-+  virtual void check_and_handle_earlyret(Register java_thread);
-+  virtual void check_and_handle_popframe(Register java_thread);
++  ResourceMark rm;
++  address addr_call = addr_at(0);
++  assert(NativeCall::is_call_at(addr_call), "unexpected code at call site");
 +
-+  void resolve_weak_handle(Register result, Register tmp);
-+  void resolve_oop_handle(Register result, Register tmp = x15);
-+  void resolve_jobject(Register value, Register thread, Register tmp);
++  // Patch the constant in the call's trampoline stub.
++  address trampoline_stub_addr = get_trampoline();
++  if (trampoline_stub_addr != NULL) {
++    assert (!is_NativeCallTrampolineStub_at(dest), "chained trampolines");
++    nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest);
++  }
 +
-+  void movoop(Register dst, jobject obj, bool immediate = false);
-+  void mov_metadata(Register dst, Metadata* obj);
-+  void bang_stack_size(Register size, Register tmp);
-+  void set_narrow_oop(Register dst, jobject obj);
-+  void set_narrow_klass(Register dst, Klass* k);
++  // Patch the call.
++  if (Assembler::reachable_from_branch_at(addr_call, dest)) {
++    set_destination(dest);
++  } else {
++    assert (trampoline_stub_addr != NULL, "we need a trampoline");
++    set_destination(trampoline_stub_addr);
++  }
 +
-+  void load_mirror(Register dst, Register method, Register tmp = x15);
-+  void access_load_at(BasicType type, DecoratorSet decorators, Register dst,
-+                      Address src, Register tmp1, Register thread_tmp);
-+  void access_store_at(BasicType type, DecoratorSet decorators, Address dst,
-+                       Register src, Register tmp1, Register thread_tmp);
-+  void load_klass(Register dst, Register src);
-+  void store_klass(Register dst, Register src);
-+  void cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L);
++  ICache::invalidate_range(addr_call, instruction_size);
++}
 +
-+  void encode_klass_not_null(Register r);
-+  void decode_klass_not_null(Register r);
-+  void encode_klass_not_null(Register dst, Register src, Register tmp = xheapbase);
-+  void decode_klass_not_null(Register dst, Register src, Register tmp = xheapbase);
-+  void decode_heap_oop_not_null(Register r);
-+  void decode_heap_oop_not_null(Register dst, Register src);
-+  void decode_heap_oop(Register d, Register s);
-+  void decode_heap_oop(Register r) { decode_heap_oop(r, r); }
-+  void encode_heap_oop(Register d, Register s);
-+  void encode_heap_oop(Register r) { encode_heap_oop(r, r); };
-+  void load_heap_oop(Register dst, Address src, Register tmp1 = noreg,
-+                     Register thread_tmp = noreg, DecoratorSet decorators = 0);
-+  void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg,
-+                              Register thread_tmp = noreg, DecoratorSet decorators = 0);
-+  void store_heap_oop(Address dst, Register src, Register tmp1 = noreg,
-+                      Register thread_tmp = noreg, DecoratorSet decorators = 0);
++address NativeCall::get_trampoline() {
++  address call_addr = addr_at(0);
 +
-+  void store_klass_gap(Register dst, Register src);
++  CodeBlob *code = CodeCache::find_blob(call_addr);
++  assert(code != NULL, "Could not find the containing code blob");
 +
-+  // currently unimplemented
-+  // Used for storing NULL. All other oop constants should be
-+  // stored using routines that take a jobject.
-+  void store_heap_oop_null(Address dst);
++  address jal_destination = MacroAssembler::pd_call_destination(call_addr);
++  if (code != NULL && code->contains(jal_destination) && is_NativeCallTrampolineStub_at(jal_destination)) {
++    return jal_destination;
++  }
 +
-+  // This dummy is to prevent a call to store_heap_oop from
-+  // converting a zero (linke NULL) into a Register by giving
-+  // the compiler two choices it can't resolve
++  if (code != NULL && code->is_nmethod()) {
++    return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code);
++  }
 +
-+  void store_heap_oop(Address dst, void* dummy);
++  return NULL;
++}
 +
-+  // Support for NULL-checks
-+  //
-+  // Generates code that causes a NULL OS exception if the content of reg is NULL.
-+  // If the accessed location is M[reg + offset] and the offset is known, provide the
-+  // offset. No explicit code generateion is needed if the offset is within a certain
-+  // range (0 <= offset <= page_size).
++// Inserts a native call instruction at a given pc
++void NativeCall::insert(address code_pos, address entry) { Unimplemented(); }
 +
-+  virtual void null_check(Register reg, int offset = -1);
-+  static bool needs_explicit_null_check(intptr_t offset);
-+  static bool uses_implicit_null_check(void* address);
++//-------------------------------------------------------------------
 +
-+  // idiv variant which deals with MINLONG as dividend and -1 as divisor
-+  int corrected_idivl(Register result, Register rs1, Register rs2,
-+                      bool want_remainder);
-+  int corrected_idivq(Register result, Register rs1, Register rs2,
-+                      bool want_remainder);
++void NativeMovConstReg::verify() {
++  if (!(nativeInstruction_at(instruction_address())->is_movptr() ||
++        is_auipc_at(instruction_address()))) {
++    fatal("should be MOVPTR or AUIPC");
++  }
++}
 +
-+  // interface method calling
-+  void lookup_interface_method(Register recv_klass,
-+                               Register intf_klass,
-+                               RegisterOrConstant itable_index,
-+                               Register method_result,
-+                               Register scan_tmp,
-+                               Label& no_such_interface,
-+                               bool return_method = true);
++intptr_t NativeMovConstReg::data() const {
++  address addr = MacroAssembler::target_addr_for_insn(instruction_address());
++  if (maybe_cpool_ref(instruction_address())) {
++    return *(intptr_t*)addr;
++  } else {
++    return (intptr_t)addr;
++  }
++}
 +
-+  // virtual method calling
-+  // n.n. x86 allows RegisterOrConstant for vtable_index
-+  void lookup_virtual_method(Register recv_klass,
-+                             RegisterOrConstant vtable_index,
-+                             Register method_result);
++void NativeMovConstReg::set_data(intptr_t x) {
++  if (maybe_cpool_ref(instruction_address())) {
++    address addr = MacroAssembler::target_addr_for_insn(instruction_address());
++    *(intptr_t*)addr = x;
++  } else {
++    // Store x into the instruction stream.
++    MacroAssembler::pd_patch_instruction_size(instruction_address(), (address)x);
++    ICache::invalidate_range(instruction_address(), movptr_instruction_size);
++  }
 +
-+  // Form an addres from base + offset in Rd. Rd my or may not
-+  // actually be used: you must use the Address that is returned. It
-+  // is up to you to ensure that the shift provided mathces the size
-+  // of your data.
-+  Address form_address(Register Rd, Register base, long byte_offset);
++  // Find and replace the oop/metadata corresponding to this
++  // instruction in oops section.
++  CodeBlob* cb = CodeCache::find_blob(instruction_address());
++  nmethod* nm = cb->as_nmethod_or_null();
++  if (nm != NULL) {
++    RelocIterator iter(nm, instruction_address(), next_instruction_address());
++    while (iter.next()) {
++      if (iter.type() == relocInfo::oop_type) {
++        oop* oop_addr = iter.oop_reloc()->oop_addr();
++        *oop_addr = cast_to_oop(x);
++        break;
++      } else if (iter.type() == relocInfo::metadata_type) {
++        Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr();
++        *metadata_addr = (Metadata*)x;
++        break;
++      }
++    }
++  }
++}
 +
-+  // allocation
-+  void tlab_allocate(
-+    Register obj,                   // result: pointer to object after successful allocation
-+    Register var_size_in_bytes,     // object size in bytes if unknown at compile time; invalid otherwise
-+    int      con_size_in_bytes,     // object size in bytes if   known at compile time
-+    Register tmp1,                  // temp register
-+    Register tmp2,                  // temp register
-+    Label&   slow_case,             // continuation point of fast allocation fails
-+    bool is_far = false
-+  );
++void NativeMovConstReg::print() {
++  tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT,
++                p2i(instruction_address()), data());
++}
 +
-+  void eden_allocate(
-+    Register obj,                   // result: pointer to object after successful allocation
-+    Register var_size_in_bytes,     // object size in bytes if unknown at compile time; invalid otherwise
-+    int      con_size_in_bytes,     // object size in bytes if   known at compile time
-+    Register tmp,                   // temp register
-+    Label&   slow_case,             // continuation point if fast allocation fails
-+    bool is_far = false
-+  );
++//-------------------------------------------------------------------
 +
-+  // Test sub_klass against super_klass, with fast and slow paths.
++int NativeMovRegMem::offset() const  {
++  Unimplemented();
++  return 0;
++}
 +
-+  // The fast path produces a tri-state answer: yes / no / maybe-slow.
-+  // One of the three labels can be NULL, meaning take the fall-through.
-+  // If super_check_offset is -1, the value is loaded up from super_klass.
-+  // No registers are killed, except tmp_reg
-+  void check_klass_subtype_fast_path(Register sub_klass,
-+                                     Register super_klass,
-+                                     Register tmp_reg,
-+                                     Label* L_success,
-+                                     Label* L_failure,
-+                                     Label* L_slow_path,
-+                                     Register super_check_offset = noreg);
++void NativeMovRegMem::set_offset(int x) { Unimplemented(); }
 +
-+  // The reset of the type cehck; must be wired to a corresponding fast path.
-+  // It does not repeat the fast path logic, so don't use it standalone.
-+  // The tmp1_reg and tmp2_reg can be noreg, if no temps are avaliable.
-+  // Updates the sub's secondary super cache as necessary.
-+  void check_klass_subtype_slow_path(Register sub_klass,
-+                                     Register super_klass,
-+                                     Register tmp1_reg,
-+                                     Register tmp2_reg,
-+                                     Label* L_success,
-+                                     Label* L_failure);
++void NativeMovRegMem::verify() {
++  Unimplemented();
++}
 +
-+  void check_klass_subtype(Register sub_klass,
-+                           Register super_klass,
-+                           Register tmp_reg,
-+                           Label& L_success);
++//--------------------------------------------------------------------------------
 +
-+  Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
++void NativeJump::verify() { }
 +
-+  // only if +VerifyOops
-+  void verify_oop(Register reg, const char* s = "broken oop");
-+  void verify_oop_addr(Address addr, const char* s = "broken oop addr");
 +
-+  void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {}
-+  void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {}
++void NativeJump::check_verified_entry_alignment(address entry, address verified_entry) {
++  // Patching to not_entrant can happen while activations of the method are
++  // in use. The patching in that instance must happen only when certain
++  // alignment restrictions are true. These guarantees check those
++  // conditions.
 +
-+#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
-+#define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
++  // Must be 4 bytes aligned
++  MacroAssembler::assert_alignment(verified_entry);
++}
 +
-+  // A more convenient access to fence for our purposes
-+  // We used four bit to indicate the read and write bits in the predecessors and successors,
-+  // and extended i for r, o for w if UseConservativeFence enabled.
-+  enum Membar_mask_bits {
-+    StoreStore = 0b0101,               // (pred = ow   + succ =   ow)
-+    LoadStore  = 0b1001,               // (pred = ir   + succ =   ow)
-+    StoreLoad  = 0b0110,               // (pred = ow   + succ =   ir)
-+    LoadLoad   = 0b1010,               // (pred = ir   + succ =   ir)
-+    AnyAny     = LoadStore | StoreLoad // (pred = iorw + succ = iorw)
-+  };
 +
-+  void membar(uint32_t order_constraint);
++address NativeJump::jump_destination() const {
++  address dest = MacroAssembler::target_addr_for_insn(instruction_address());
 +
-+  static void membar_mask_to_pred_succ(uint32_t order_constraint, uint32_t& predecessor, uint32_t& successor) {
-+    predecessor = (order_constraint >> 2) & 0x3;
-+    successor = order_constraint & 0x3;
++  // We use jump to self as the unresolved address which the inline
++  // cache code (and relocs) know about
 +
-+    // extend rw -> iorw:
-+    // 01(w) -> 0101(ow)
-+    // 10(r) -> 1010(ir)
-+    // 11(rw)-> 1111(iorw)
-+    if (UseConservativeFence) {
-+      predecessor |= predecessor << 2;
-+      successor |= successor << 2;
-+    }
-+  }
++  // return -1 if jump to self
++  dest = (dest == (address) this) ? (address) -1 : dest;
++  return dest;
++};
 +
-+  static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) {
-+    return ((predecessor & 0x3) << 2) | (successor & 0x3);
-+  }
++void NativeJump::set_jump_destination(address dest) {
++  // We use jump to self as the unresolved address which the inline
++  // cache code (and relocs) know about
++  if (dest == (address) -1)
++    dest = instruction_address();
 +
-+  // prints msg, dumps registers and stops execution
-+  void stop(const char* msg);
++  MacroAssembler::pd_patch_instruction(instruction_address(), dest);
++  ICache::invalidate_range(instruction_address(), instruction_size);
++}
 +
-+  static void debug64(char* msg, int64_t pc, int64_t regs[]);
++//-------------------------------------------------------------------
 +
-+  void unimplemented(const char* what = "");
++address NativeGeneralJump::jump_destination() const {
++  NativeMovConstReg* move = nativeMovConstReg_at(instruction_address());
++  address dest = (address) move->data();
 +
-+  void should_not_reach_here() { stop("should not reach here"); }
++  // We use jump to self as the unresolved address which the inline
++  // cache code (and relocs) know about
 +
-+  static address target_addr_for_insn(address insn_addr);
++  // return -1 if jump to self
++  dest = (dest == (address) this) ? (address) -1 : dest;
++  return dest;
++}
 +
-+  // Required platform-specific helpers for Label::patch_instructions.
-+  // They _shadow_ the declarations in AbstractAssembler, which are undefined.
-+  static int pd_patch_instruction_size(address branch, address target);
-+  static void pd_patch_instruction(address branch, address target, const char* file = NULL, int line = 0) {
-+    pd_patch_instruction_size(branch, target);
-+  }
-+  static address pd_call_destination(address branch) {
-+    return target_addr_for_insn(branch);
-+  }
++//-------------------------------------------------------------------
 +
-+  static int patch_oop(address insn_addr, address o);
-+  address emit_trampoline_stub(int insts_call_instruction_offset, address target);
-+  void emit_static_call_stub();
++bool NativeInstruction::is_safepoint_poll() {
++  return is_lwu_to_zr(address(this));
++}
 +
-+  // The following 4 methods return the offset of the appropriate move instruction
++bool NativeInstruction::is_lwu_to_zr(address instr) {
++  assert_cond(instr != NULL);
++  return (extract_opcode(instr) == 0b0000011 &&
++          extract_funct3(instr) == 0b110 &&
++          extract_rd(instr) == zr);         // zr
++}
 +
-+  // Support for fast byte/short loading with zero extension (depending on particular CPU)
-+  int load_unsigned_byte(Register dst, Address src);
-+  int load_unsigned_short(Register dst, Address src);
++// A 16-bit instruction with all bits ones is permanently reserved as an illegal instruction.
++bool NativeInstruction::is_sigill_zombie_not_entrant() {
++  // jvmci
++  return uint_at(0) == 0xffffffff;
++}
 +
-+  // Support for fast byte/short loading with sign extension (depending on particular CPU)
-+  int load_signed_byte(Register dst, Address src);
-+  int load_signed_short(Register dst, Address src);
++void NativeIllegalInstruction::insert(address code_pos) {
++  assert_cond(code_pos != NULL);
++  *(juint*)code_pos = 0xffffffff; // all bits ones is permanently reserved as an illegal instruction
++}
 +
-+  // Load and store values by size and signed-ness
-+  void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
-+  void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
++bool NativeInstruction::is_stop() {
++  return uint_at(0) == 0xffffffff; // an illegal instruction
++}
 +
-+ public:
-+  // Standard pseudoinstruction
-+  void nop();
-+  void mv(Register Rd, Register Rs);
-+  void notr(Register Rd, Register Rs);
-+  void neg(Register Rd, Register Rs);
-+  void negw(Register Rd, Register Rs);
-+  void sext_w(Register Rd, Register Rs);
-+  void zext_b(Register Rd, Register Rs);
-+  void seqz(Register Rd, Register Rs);          // set if = zero
-+  void snez(Register Rd, Register Rs);          // set if != zero
-+  void sltz(Register Rd, Register Rs);          // set if < zero
-+  void sgtz(Register Rd, Register Rs);          // set if > zero
++//-------------------------------------------------------------------
 +
-+  // Float pseudoinstruction
-+  void fmv_s(FloatRegister Rd, FloatRegister Rs);
-+  void fabs_s(FloatRegister Rd, FloatRegister Rs);    // single-precision absolute value
-+  void fneg_s(FloatRegister Rd, FloatRegister Rs);
++// MT-safe inserting of a jump over a jump or a nop (used by
++// nmethod::make_not_entrant_or_zombie)
 +
-+  // Double pseudoinstruction
-+  void fmv_d(FloatRegister Rd, FloatRegister Rs);
-+  void fabs_d(FloatRegister Rd, FloatRegister Rs);
-+  void fneg_d(FloatRegister Rd, FloatRegister Rs);
++void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) {
 +
-+  // Pseudoinstruction for control and status register
-+  void rdinstret(Register Rd);                  // read instruction-retired counter
-+  void rdcycle(Register Rd);                    // read cycle counter
-+  void rdtime(Register Rd);                     // read time
-+  void csrr(Register Rd, unsigned csr);         // read csr
-+  void csrw(unsigned csr, Register Rs);         // write csr
-+  void csrs(unsigned csr, Register Rs);         // set bits in csr
-+  void csrc(unsigned csr, Register Rs);         // clear bits in csr
-+  void csrwi(unsigned csr, unsigned imm);
-+  void csrsi(unsigned csr, unsigned imm);
-+  void csrci(unsigned csr, unsigned imm);
-+  void frcsr(Register Rd);                      // read float-point csr
-+  void fscsr(Register Rd, Register Rs);         // swap float-point csr
-+  void fscsr(Register Rs);                      // write float-point csr
-+  void frrm(Register Rd);                       // read float-point rounding mode
-+  void fsrm(Register Rd, Register Rs);          // swap float-point rounding mode
-+  void fsrm(Register Rs);                       // write float-point rounding mode
-+  void fsrmi(Register Rd, unsigned imm);
-+  void fsrmi(unsigned imm);
-+  void frflags(Register Rd);                    // read float-point exception flags
-+  void fsflags(Register Rd, Register Rs);       // swap float-point exception flags
-+  void fsflags(Register Rs);                    // write float-point exception flags
-+  void fsflagsi(Register Rd, unsigned imm);
-+  void fsflagsi(unsigned imm);
++  assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch");
 +
-+  void beqz(Register Rs, const address &dest);
-+  void bnez(Register Rs, const address &dest);
-+  void blez(Register Rs, const address &dest);
-+  void bgez(Register Rs, const address &dest);
-+  void bltz(Register Rs, const address &dest);
-+  void bgtz(Register Rs, const address &dest);
-+  void la(Register Rd, Label &label);
-+  void la(Register Rd, const address &dest);
-+  void la(Register Rd, const Address &adr);
-+  //label
-+  void beqz(Register Rs, Label &l, bool is_far = false);
-+  void bnez(Register Rs, Label &l, bool is_far = false);
-+  void blez(Register Rs, Label &l, bool is_far = false);
-+  void bgez(Register Rs, Label &l, bool is_far = false);
-+  void bltz(Register Rs, Label &l, bool is_far = false);
-+  void bgtz(Register Rs, Label &l, bool is_far = false);
-+  void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
-+  void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
++  assert(nativeInstruction_at(verified_entry)->is_jump_or_nop() ||
++         nativeInstruction_at(verified_entry)->is_sigill_zombie_not_entrant(),
++         "riscv cannot replace non-jump with jump");
 +
-+  void push_reg(RegSet regs, Register stack) { if (regs.bits()) { push_reg(regs.bits(), stack); } }
-+  void pop_reg(RegSet regs, Register stack) { if (regs.bits()) { pop_reg(regs.bits(), stack); } }
-+  void push_reg(Register Rs);
-+  void pop_reg(Register Rd);
-+  int  push_reg(unsigned int bitset, Register stack);
-+  int  pop_reg(unsigned int bitset, Register stack);
-+  void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); }
-+  void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); }
-+#ifdef COMPILER2
-+  void push_vp(VectorRegSet regs, Register stack) { if (regs.bits()) push_vp(regs.bits(), stack); }
-+  void pop_vp(VectorRegSet regs, Register stack) { if (regs.bits()) pop_vp(regs.bits(), stack); }
-+#endif // COMPILER2
++  check_verified_entry_alignment(entry, verified_entry);
 +
-+  // Push and pop everything that might be clobbered by a native
-+  // runtime call except t0 and t1. (They are always
-+  // temporary registers, so we don't have to protect them.)
-+  // Additional registers can be excluded in a passed RegSet.
-+  void push_call_clobbered_registers_except(RegSet exclude);
-+  void pop_call_clobbered_registers_except(RegSet exclude);
++  // Patch this nmethod atomically.
++  if (Assembler::reachable_from_branch_at(verified_entry, dest)) {
++    ptrdiff_t offset = dest - verified_entry;
++    guarantee(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction."); // 1M
 +
-+  void push_call_clobbered_registers() {
-+    push_call_clobbered_registers_except(RegSet());
-+  }
-+  void pop_call_clobbered_registers() {
-+    pop_call_clobbered_registers_except(RegSet());
++    uint32_t insn = 0;
++    address pInsn = (address)&insn;
++    Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1);
++    Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff);
++    Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1);
++    Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff);
++    Assembler::patch(pInsn, 11, 7, 0); // zero, no link jump
++    Assembler::patch(pInsn, 6, 0, 0b1101111); // j, (jal x0 offset)
++    *(unsigned int*)verified_entry = insn;
++  } else {
++    // We use an illegal instruction for marking a method as
++    // not_entrant or zombie.
++    NativeIllegalInstruction::insert(verified_entry);
 +  }
 +
-+  void pusha();
-+  void popa();
-+  void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0);
-+  void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0);
++  ICache::invalidate_range(verified_entry, instruction_size);
++}
 +
-+  // if heap base register is used - reinit it with the correct value
-+  void reinit_heapbase();
++void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
++  CodeBuffer cb(code_pos, instruction_size);
++  MacroAssembler a(&cb);
 +
-+  void bind(Label& L) {
-+    Assembler::bind(L);
-+    // fences across basic blocks should not be merged
-+    code()->clear_last_insn();
-+  }
++  int32_t offset = 0;
++  a.movptr_with_offset(t0, entry, offset); // lui, addi, slli, addi, slli
++  a.jalr(x0, t0, offset); // jalr
 +
-+  // mv
-+  template<typename T, ENABLE_IF(std::is_integral<T>::value)>
-+  inline void mv(Register Rd, T o) {
-+    li(Rd, (int64_t)o);
-+  }
++  ICache::invalidate_range(code_pos, instruction_size);
++}
 +
-+  inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); }
++// MT-safe patching of a long jump instruction.
++void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
++  ShouldNotCallThis();
++}
 +
-+  void mv(Register Rd, Address dest);
-+  void mv(Register Rd, address addr);
-+  void mv(Register Rd, RegisterOrConstant src);
 +
-+  // logic
-+  void andrw(Register Rd, Register Rs1, Register Rs2);
-+  void orrw(Register Rd, Register Rs1, Register Rs2);
-+  void xorrw(Register Rd, Register Rs1, Register Rs2);
++address NativeCallTrampolineStub::destination(nmethod *nm) const {
++  return ptr_at(data_offset);
++}
 +
-+  // revb
-+  void revb_h_h(Register Rd, Register Rs, Register tmp = t0);                           // reverse bytes in halfword in lower 16 bits, sign-extend
-+  void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);      // reverse bytes in lower word, sign-extend
-+  void revb_h_h_u(Register Rd, Register Rs, Register tmp = t0);                         // reverse bytes in halfword in lower 16 bits, zero-extend
-+  void revb_h_w_u(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);    // reverse bytes in halfwords in lower 32 bits, zero-extend
-+  void revb_h_helper(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);  // reverse bytes in upper 16 bits (48:63) and move to lower
-+  void revb_h(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);         // reverse bytes in each halfword
-+  void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);         // reverse bytes in each word
-+  void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);          // reverse bytes in doubleword
++void NativeCallTrampolineStub::set_destination(address new_destination) {
++  set_ptr_at(data_offset, new_destination);
++  OrderAccess::release();
++}
 +
-+  void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0);
-+  void andi(Register Rd, Register Rn, int64_t imm, Register tmp = t0);
-+  void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1);
++uint32_t NativeMembar::get_kind() {
++  uint32_t insn = uint_at(0);
 +
-+  void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail);
-+  void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail);
-+  void cmpxchg(Register addr, Register expected,
-+               Register new_val,
-+               enum operand_size size,
-+               Assembler::Aqrl acquire, Assembler::Aqrl release,
-+               Register result, bool result_as_bool = false);
-+  void cmpxchg_weak(Register addr, Register expected,
-+                    Register new_val,
-+                    enum operand_size size,
-+                    Assembler::Aqrl acquire, Assembler::Aqrl release,
-+                    Register result);
-+  void cmpxchg_narrow_value_helper(Register addr, Register expected,
-+                                   Register new_val,
-+                                   enum operand_size size,
-+                                   Register tmp1, Register tmp2, Register tmp3);
-+  void cmpxchg_narrow_value(Register addr, Register expected,
-+                            Register new_val,
-+                            enum operand_size size,
-+                            Assembler::Aqrl acquire, Assembler::Aqrl release,
-+                            Register result, bool result_as_bool,
-+                            Register tmp1, Register tmp2, Register tmp3);
-+  void weak_cmpxchg_narrow_value(Register addr, Register expected,
-+                                 Register new_val,
-+                                 enum operand_size size,
-+                                 Assembler::Aqrl acquire, Assembler::Aqrl release,
-+                                 Register result,
-+                                 Register tmp1, Register tmp2, Register tmp3);
++  uint32_t predecessor = Assembler::extract(insn, 27, 24);
++  uint32_t successor = Assembler::extract(insn, 23, 20);
 +
-+  void atomic_add(Register prev, RegisterOrConstant incr, Register addr);
-+  void atomic_addw(Register prev, RegisterOrConstant incr, Register addr);
-+  void atomic_addal(Register prev, RegisterOrConstant incr, Register addr);
-+  void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr);
++  return MacroAssembler::pred_succ_to_membar_mask(predecessor, successor);
++}
 +
-+  void atomic_xchg(Register prev, Register newv, Register addr);
-+  void atomic_xchgw(Register prev, Register newv, Register addr);
-+  void atomic_xchgal(Register prev, Register newv, Register addr);
-+  void atomic_xchgalw(Register prev, Register newv, Register addr);
-+  void atomic_xchgwu(Register prev, Register newv, Register addr);
-+  void atomic_xchgalwu(Register prev, Register newv, Register addr);
-+
-+  static bool far_branches() {
-+    return ReservedCodeCacheSize > branch_range;
-+  }
-+
-+  // Jumps that can reach anywhere in the code cache.
-+  // Trashes tmp.
-+  void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0);
-+  void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0);
-+
-+  static int far_branch_size() {
-+    if (far_branches()) {
-+      return 2 * 4;  // auipc + jalr, see far_call() & far_jump()
-+    } else {
-+      return 4;
-+    }
-+  }
-+
-+  void load_byte_map_base(Register reg);
-+
-+  void bang_stack_with_offset(int offset) {
-+    // stack grows down, caller passes positive offset
-+    assert(offset > 0, "must bang with negative offset");
-+    sub(t0, sp, offset);
-+    sd(zr, Address(t0));
-+  }
-+
-+  void la_patchable(Register reg1, const Address &dest, int32_t &offset);
-+
-+  virtual void _call_Unimplemented(address call_site) {
-+    mv(t1, call_site);
-+  }
-+
-+  #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__)
-+
-+  // Frame creation and destruction shared between JITs.
-+  void build_frame(int framesize);
-+  void remove_frame(int framesize);
-+
-+  void reserved_stack_check();
-+
-+  void get_polling_page(Register dest, relocInfo::relocType rtype);
-+  address read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
-+
-+  address trampoline_call(Address entry, CodeBuffer* cbuf = NULL);
-+  address ic_call(address entry, jint method_index = 0);
-+
-+  void add_memory_int64(const Address dst, int64_t imm);
-+  void add_memory_int32(const Address dst, int32_t imm);
-+
-+  void cmpptr(Register src1, Address src2, Label& equal);
-+
-+  void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = NULL, Label* L_slow_path = NULL);
-+  void load_method_holder_cld(Register result, Register method);
-+  void load_method_holder(Register holder, Register method);
-+
-+  void compute_index(Register str1, Register trailing_zeros, Register match_mask,
-+                     Register result, Register char_tmp, Register tmp,
-+                     bool haystack_isL);
-+  void compute_match_mask(Register src, Register pattern, Register match_mask,
-+                          Register mask1, Register mask2);
-+
-+#ifdef COMPILER2
-+  void mul_add(Register out, Register in, Register offset,
-+               Register len, Register k, Register tmp);
-+  void cad(Register dst, Register src1, Register src2, Register carry);
-+  void cadc(Register dst, Register src1, Register src2, Register carry);
-+  void adc(Register dst, Register src1, Register src2, Register carry);
-+  void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
-+                       Register src1, Register src2, Register carry);
-+  void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
-+                             Register y, Register y_idx, Register z,
-+                             Register carry, Register product,
-+                             Register idx, Register kdx);
-+  void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
-+                             Register y, Register y_idx, Register z,
-+                             Register carry, Register product,
-+                             Register idx, Register kdx);
-+  void multiply_128_x_128_loop(Register y, Register z,
-+                               Register carry, Register carry2,
-+                               Register idx, Register jdx,
-+                               Register yz_idx1, Register yz_idx2,
-+                               Register tmp, Register tmp3, Register tmp4,
-+                               Register tmp6, Register product_hi);
-+  void multiply_to_len(Register x, Register xlen, Register y, Register ylen,
-+                       Register z, Register zlen,
-+                       Register tmp1, Register tmp2, Register tmp3, Register tmp4,
-+                       Register tmp5, Register tmp6, Register product_hi);
-+#endif
-+
-+  void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
-+  void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
-+
-+  void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1);
-+
-+  void zero_words(Register base, u_int64_t cnt);
-+  address zero_words(Register ptr, Register cnt);
-+  void fill_words(Register base, Register cnt, Register value);
-+  void zero_memory(Register addr, Register len, Register tmp);
-+
-+  // shift left by shamt and add
-+  void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt);
-+
-+  // Here the float instructions with safe deal with some exceptions.
-+  // e.g. convert from NaN, +Inf, -Inf to int, float, double
-+  // will trigger exception, we need to deal with these situations
-+  // to get correct results.
-+  void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0);
-+  void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0);
-+  void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
-+  void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
-+
-+  // vector load/store unit-stride instructions
-+  void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
-+    switch (sew) {
-+      case Assembler::e64:
-+        vle64_v(vd, base, vm);
-+        break;
-+      case Assembler::e32:
-+        vle32_v(vd, base, vm);
-+        break;
-+      case Assembler::e16:
-+        vle16_v(vd, base, vm);
-+        break;
-+      case Assembler::e8: // fall through
-+      default:
-+        vle8_v(vd, base, vm);
-+        break;
-+    }
-+  }
-+
-+  void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
-+    switch (sew) {
-+      case Assembler::e64:
-+        vse64_v(store_data, base, vm);
-+        break;
-+      case Assembler::e32:
-+        vse32_v(store_data, base, vm);
-+        break;
-+      case Assembler::e16:
-+        vse16_v(store_data, base, vm);
-+        break;
-+      case Assembler::e8: // fall through
-+      default:
-+        vse8_v(store_data, base, vm);
-+        break;
-+    }
-+  }
-+
-+  static const int zero_words_block_size;
-+
-+  void cast_primitive_type(BasicType type, Register Rt) {
-+    switch (type) {
-+      case T_BOOLEAN:
-+        sltu(Rt, zr, Rt);
-+        break;
-+      case T_CHAR   :
-+        zero_extend(Rt, Rt, 16);
-+        break;
-+      case T_BYTE   :
-+        sign_extend(Rt, Rt, 8);
-+        break;
-+      case T_SHORT  :
-+        sign_extend(Rt, Rt, 16);
-+        break;
-+      case T_INT    :
-+        addw(Rt, Rt, zr);
-+        break;
-+      case T_LONG   : /* nothing to do */        break;
-+      case T_VOID   : /* nothing to do */        break;
-+      case T_FLOAT  : /* nothing to do */        break;
-+      case T_DOUBLE : /* nothing to do */        break;
-+      default: ShouldNotReachHere();
-+    }
-+  }
-+
-+  // float cmp with unordered_result
-+  void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
-+  void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
-+
-+  // Zero/Sign-extend
-+  void zero_extend(Register dst, Register src, int bits);
-+  void sign_extend(Register dst, Register src, int bits);
-+
-+  // compare src1 and src2 and get -1/0/1 in dst.
-+  // if [src1 > src2], dst = 1;
-+  // if [src1 == src2], dst = 0;
-+  // if [src1 < src2], dst = -1;
-+  void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0);
-+
-+  int push_fp(unsigned int bitset, Register stack);
-+  int pop_fp(unsigned int bitset, Register stack);
-+
-+  int push_vp(unsigned int bitset, Register stack);
-+  int pop_vp(unsigned int bitset, Register stack);
-+
-+  // vext
-+  void vmnot_m(VectorRegister vd, VectorRegister vs);
-+  void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked);
-+  void vfneg_v(VectorRegister vd, VectorRegister vs);
-+
-+private:
-+
-+#ifdef ASSERT
-+  // Template short-hand support to clean-up after a failed call to trampoline
-+  // call generation (see trampoline_call() below), when a set of Labels must
-+  // be reset (before returning).
-+  template<typename Label, typename... More>
-+  void reset_labels(Label& lbl, More&... more) {
-+    lbl.reset(); reset_labels(more...);
-+  }
-+  template<typename Label>
-+  void reset_labels(Label& lbl) {
-+    lbl.reset();
-+  }
-+#endif
-+  void repne_scan(Register addr, Register value, Register count, Register tmp);
-+
-+  // Return true if an address is within the 48-bit RISCV64 address space.
-+  bool is_valid_riscv64_address(address addr) {
-+    return ((uintptr_t)addr >> 48) == 0;
-+  }
-+
-+  void ld_constant(Register dest, const Address &const_addr) {
-+    if (NearCpool) {
-+      ld(dest, const_addr);
-+    } else {
-+      int32_t offset = 0;
-+      la_patchable(dest, InternalAddress(const_addr.target()), offset);
-+      ld(dest, Address(dest, offset));
-+    }
-+  }
-+
-+  int bitset_to_regs(unsigned int bitset, unsigned char* regs);
-+  Address add_memory_helper(const Address dst);
-+
-+  void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire);
-+  void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release);
-+
-+  // Check the current thread doesn't need a cross modify fence.
-+  void verify_cross_modify_fence_not_required() PRODUCT_RETURN;
-+};
-+
-+#ifdef ASSERT
-+inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
-+#endif
++void NativeMembar::set_kind(uint32_t order_kind) {
++  uint32_t predecessor = 0;
++  uint32_t successor = 0;
 +
-+/**
-+ * class SkipIfEqual:
-+ *
-+ * Instantiating this class will result in assembly code being output that will
-+ * jump around any code emitted between the creation of the instance and it's
-+ * automatic destruction at the end of a scope block, depending on the value of
-+ * the flag passed to the constructor, which will be checked at run-time.
-+ */
-+class SkipIfEqual {
-+ private:
-+  MacroAssembler* _masm;
-+  Label _label;
++  MacroAssembler::membar_mask_to_pred_succ(order_kind, predecessor, successor);
 +
-+ public:
-+   SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
-+   ~SkipIfEqual();
-+};
++  uint32_t insn = uint_at(0);
++  address pInsn = (address) &insn;
++  Assembler::patch(pInsn, 27, 24, predecessor);
++  Assembler::patch(pInsn, 23, 20, successor);
 +
-+#endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp
++  address membar = addr_at(0);
++  *(unsigned int*) membar = insn;
++}
+diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
 new file mode 100644
-index 00000000000..ef968ccd96d
+index 0000000000..183ab85fc9
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp
-@@ -0,0 +1,31 @@
++++ b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
+@@ -0,0 +1,520 @@
 +/*
-+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -27434,663 +26357,558 @@ index 00000000000..ef968ccd96d
 + *
 + */
 +
-+#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
-+#define CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
-+
-+// Still empty.
++#ifndef CPU_RISCV_NATIVEINST_RISCV_HPP
++#define CPU_RISCV_NATIVEINST_RISCV_HPP
 +
-+#endif // CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
-diff --git a/src/hotspot/cpu/riscv/matcher_riscv.hpp b/src/hotspot/cpu/riscv/matcher_riscv.hpp
-new file mode 100644
-index 00000000000..23a75d20502
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/matcher_riscv.hpp
-@@ -0,0 +1,169 @@
-+/*
-+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2021, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++#include "asm/assembler.hpp"
++#include "runtime/icache.hpp"
++#include "runtime/os.hpp"
 +
-+#ifndef CPU_RISCV_MATCHER_RISCV_HPP
-+#define CPU_RISCV_MATCHER_RISCV_HPP
++// We have interfaces for the following instructions:
++// - NativeInstruction
++// - - NativeCall
++// - - NativeMovConstReg
++// - - NativeMovRegMem
++// - - NativeJump
++// - - NativeGeneralJump
++// - - NativeIllegalInstruction
++// - - NativeCallTrampolineStub
++// - - NativeMembar
 +
-+  // Defined within class Matcher
++// The base class for different kinds of native instruction abstractions.
++// Provides the primitive operations to manipulate code relative to this.
 +
-+  // false => size gets scaled to BytesPerLong, ok.
-+  static const bool init_array_count_is_in_bytes = false;
++class NativeCall;
 +
-+  // Whether this platform implements the scalable vector feature
-+  static const bool implements_scalable_vector = true;
++class NativeInstruction {
++  friend class Relocation;
++  friend bool is_NativeCallTrampolineStub_at(address);
++ public:
++  enum {
++    instruction_size = 4,
++    compressed_instruction_size = 2,
++  };
 +
-+  static const bool supports_scalable_vector() {
-+    return UseRVV;
++  juint encoding() const {
++    return uint_at(0);
 +  }
 +
-+  // riscv supports misaligned vectors store/load.
-+  static constexpr bool misaligned_vectors_ok() {
-+    return true;
-+  }
++  bool is_jal()                             const { return is_jal_at(addr_at(0));         }
++  bool is_movptr()                          const { return is_movptr_at(addr_at(0));      }
++  bool is_call()                            const { return is_call_at(addr_at(0));        }
++  bool is_jump()                            const { return is_jump_at(addr_at(0));        }
 +
-+  // Whether code generation need accurate ConvI2L types.
-+  static const bool convi2l_type_required = false;
++  static bool is_jal_at(address instr)        { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1101111; }
++  static bool is_jalr_at(address instr)       { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100111 && extract_funct3(instr) == 0b000; }
++  static bool is_branch_at(address instr)     { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100011; }
++  static bool is_ld_at(address instr)         { assert_cond(instr != NULL); return is_load_at(instr) && extract_funct3(instr) == 0b011; }
++  static bool is_load_at(address instr)       { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000011; }
++  static bool is_float_load_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000111; }
++  static bool is_auipc_at(address instr)      { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010111; }
++  static bool is_jump_at(address instr)       { assert_cond(instr != NULL); return is_branch_at(instr) || is_jal_at(instr) || is_jalr_at(instr); }
++  static bool is_addi_at(address instr)       { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010011 && extract_funct3(instr) == 0b000; }
++  static bool is_addiw_at(address instr)      { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0011011 && extract_funct3(instr) == 0b000; }
++  static bool is_lui_at(address instr)        { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0110111; }
++  static bool is_slli_shift_at(address instr, uint32_t shift) {
++    assert_cond(instr != NULL);
++    return (extract_opcode(instr) == 0b0010011 && // opcode field
++            extract_funct3(instr) == 0b001 &&     // funct3 field, select the type of operation
++            Assembler::extract(((unsigned*)instr)[0], 25, 20) == shift);    // shamt field
++  }
 +
-+  // Does the CPU require late expand (see block.cpp for description of late expand)?
-+  static const bool require_postalloc_expand = false;
++  static Register extract_rs1(address instr);
++  static Register extract_rs2(address instr);
++  static Register extract_rd(address instr);
++  static uint32_t extract_opcode(address instr);
++  static uint32_t extract_funct3(address instr);
 +
-+  // Do we need to mask the count passed to shift instructions or does
-+  // the cpu only look at the lower 5/6 bits anyway?
-+  static const bool need_masked_shift_count = false;
++  // the instruction sequence of movptr is as below:
++  //     lui
++  //     addi
++  //     slli
++  //     addi
++  //     slli
++  //     addi/jalr/load
++  static bool check_movptr_data_dependency(address instr) {
++    address lui = instr;
++    address addi1 = lui + instruction_size;
++    address slli1 = addi1 + instruction_size;
++    address addi2 = slli1 + instruction_size;
++    address slli2 = addi2 + instruction_size;
++    address last_instr = slli2 + instruction_size;
++    return extract_rs1(addi1) == extract_rd(lui) &&
++           extract_rs1(addi1) == extract_rd(addi1) &&
++           extract_rs1(slli1) == extract_rd(addi1) &&
++           extract_rs1(slli1) == extract_rd(slli1) &&
++           extract_rs1(addi2) == extract_rd(slli1) &&
++           extract_rs1(addi2) == extract_rd(addi2) &&
++           extract_rs1(slli2) == extract_rd(addi2) &&
++           extract_rs1(slli2) == extract_rd(slli2) &&
++           extract_rs1(last_instr) == extract_rd(slli2);
++  }
 +
-+  // No support for generic vector operands.
-+  static const bool supports_generic_vector_operands = false;
++  // the instruction sequence of li32 is as below:
++  //     lui
++  //     addiw
++  static bool check_li32_data_dependency(address instr) {
++    address lui = instr;
++    address addiw = lui + instruction_size;
 +
-+  static constexpr bool isSimpleConstant64(jlong value) {
-+    // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
-+    // Probably always true, even if a temp register is required.
-+    return true;
++    return extract_rs1(addiw) == extract_rd(lui) &&
++           extract_rs1(addiw) == extract_rd(addiw);
 +  }
 +
-+  // Use conditional move (CMOVL)
-+  static constexpr int long_cmove_cost() {
-+    // long cmoves are no more expensive than int cmoves
-+    return 0;
-+  }
++  // the instruction sequence of pc-relative is as below:
++  //     auipc
++  //     jalr/addi/load/float_load
++  static bool check_pc_relative_data_dependency(address instr) {
++    address auipc = instr;
++    address last_instr = auipc + instruction_size;
 +
-+  static constexpr int float_cmove_cost() {
-+    // float cmoves are no more expensive than int cmoves
-+    return 0;
++    return extract_rs1(last_instr) == extract_rd(auipc);
 +  }
 +
-+  // This affects two different things:
-+  //  - how Decode nodes are matched
-+  //  - how ImplicitNullCheck opportunities are recognized
-+  // If true, the matcher will try to remove all Decodes and match them
-+  // (as operands) into nodes. NullChecks are not prepared to deal with
-+  // Decodes by final_graph_reshaping().
-+  // If false, final_graph_reshaping() forces the decode behind the Cmp
-+  // for a NullCheck. The matcher matches the Decode node into a register.
-+  // Implicit_null_check optimization moves the Decode along with the
-+  // memory operation back up before the NullCheck.
-+  static bool narrow_oop_use_complex_address() {
-+    return CompressedOops::shift() == 0;
++  // the instruction sequence of load_label is as below:
++  //     auipc
++  //     load
++  static bool check_load_pc_relative_data_dependency(address instr) {
++    address auipc = instr;
++    address load = auipc + instruction_size;
++
++    return extract_rd(load) == extract_rd(auipc) &&
++           extract_rs1(load) == extract_rd(load);
 +  }
 +
-+  static bool narrow_klass_use_complex_address() {
++  static bool is_movptr_at(address instr);
++  static bool is_li32_at(address instr);
++  static bool is_pc_relative_at(address branch);
++  static bool is_load_pc_relative_at(address branch);
++
++  static bool is_call_at(address instr) {
++    if (is_jal_at(instr) || is_jalr_at(instr)) {
++      return true;
++    }
 +    return false;
 +  }
++  static bool is_lwu_to_zr(address instr);
 +
-+  static bool const_oop_prefer_decode() {
-+    // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
-+    return CompressedOops::base() == NULL;
-+  }
++  inline bool is_nop();
++  inline bool is_jump_or_nop();
++  bool is_safepoint_poll();
++  bool is_sigill_zombie_not_entrant();
++  bool is_stop();
 +
-+  static bool const_klass_prefer_decode() {
-+    // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
-+    return CompressedKlassPointers::base() == NULL;
-+  }
++ protected:
++  address addr_at(int offset) const    { return address(this) + offset; }
 +
-+  // Is it better to copy float constants, or load them directly from
-+  // memory?  Intel can load a float constant from a direct address,
-+  // requiring no extra registers.  Most RISCs will have to materialize
-+  // an address into a register first, so they would do better to copy
-+  // the constant from stack.
-+  static const bool rematerialize_float_constants = false;
++  jint int_at(int offset) const        { return *(jint*) addr_at(offset); }
++  juint uint_at(int offset) const      { return *(juint*) addr_at(offset); }
 +
-+  // If CPU can load and store mis-aligned doubles directly then no
-+  // fixup is needed.  Else we split the double into 2 integer pieces
-+  // and move it piece-by-piece.  Only happens when passing doubles into
-+  // C code as the Java calling convention forces doubles to be aligned.
-+  static const bool misaligned_doubles_ok = true;
++  address ptr_at(int offset) const     { return *(address*) addr_at(offset); }
 +
-+  // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
-+  static const bool strict_fp_requires_explicit_rounding = false;
++  oop  oop_at (int offset) const       { return *(oop*) addr_at(offset); }
 +
-+  // Are floats converted to double when stored to stack during
-+  // deoptimization?
-+  static constexpr bool float_in_double() { return false; }
 +
-+  // Do ints take an entire long register or just half?
-+  // The relevant question is how the int is callee-saved:
-+  // the whole long is written but de-opt'ing will have to extract
-+  // the relevant 32 bits.
-+  static const bool int_in_long = true;
++  void set_int_at(int offset, jint  i)        { *(jint*)addr_at(offset) = i; }
++  void set_uint_at(int offset, jint  i)       { *(juint*)addr_at(offset) = i; }
++  void set_ptr_at (int offset, address  ptr)  { *(address*) addr_at(offset) = ptr; }
++  void set_oop_at (int offset, oop  o)        { *(oop*) addr_at(offset) = o; }
 +
-+  // Does the CPU supports vector variable shift instructions?
-+  static constexpr bool supports_vector_variable_shifts(void) {
-+    return false;
-+  }
++ public:
 +
-+  // Does the CPU supports vector variable rotate instructions?
-+  static constexpr bool supports_vector_variable_rotates(void) {
-+    return false;
-+  }
++  inline friend NativeInstruction* nativeInstruction_at(address addr);
 +
-+  // Does the CPU supports vector constant rotate instructions?
-+  static constexpr bool supports_vector_constant_rotates(int shift) {
-+    return false;
++  static bool maybe_cpool_ref(address instr) {
++    return is_auipc_at(instr);
 +  }
 +
-+  // Does the CPU supports vector unsigned comparison instructions?
-+  static const bool supports_vector_comparison_unsigned(int vlen, BasicType bt) {
-+    return false;
++  bool is_membar() {
++    return (uint_at(0) & 0x7f) == 0b1111 && extract_funct3(addr_at(0)) == 0;
 +  }
++};
 +
-+  // Some microarchitectures have mask registers used on vectors
-+  static const bool has_predicated_vectors(void) {
-+    return false;
-+  }
++inline NativeInstruction* nativeInstruction_at(address addr) {
++  return (NativeInstruction*)addr;
++}
 +
-+  // true means we have fast l2f convers
-+  // false means that conversion is done by runtime call
-+  static constexpr bool convL2FSupported(void) {
-+      return true;
++// The natural type of an RISCV instruction is uint32_t
++inline NativeInstruction* nativeInstruction_at(uint32_t *addr) {
++  return (NativeInstruction*)addr;
++}
++
++inline NativeCall* nativeCall_at(address addr);
++// The NativeCall is an abstraction for accessing/manipulating native
++// call instructions (used to manipulate inline caches, primitive &
++// DSO calls, etc.).
++
++class NativeCall: public NativeInstruction {
++ public:
++  enum RISCV_specific_constants {
++    instruction_size            =    4,
++    instruction_offset          =    0,
++    displacement_offset         =    0,
++    return_address_offset       =    4
++  };
++
++  address instruction_address() const       { return addr_at(instruction_offset); }
++  address next_instruction_address() const  { return addr_at(return_address_offset); }
++  address return_address() const            { return addr_at(return_address_offset); }
++  address destination() const;
++
++  void set_destination(address dest) {
++    assert(is_jal(), "Should be jal instruction!");
++    intptr_t offset = (intptr_t)(dest - instruction_address());
++    assert((offset & 0x1) == 0, "bad alignment");
++    assert(is_imm_in_range(offset, 20, 1), "encoding constraint");
++    unsigned int insn = 0b1101111; // jal
++    address pInsn = (address)(&insn);
++    Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1);
++    Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff);
++    Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1);
++    Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff);
++    Assembler::patch(pInsn, 11, 7, ra->encoding()); // Rd must be x1, need ra
++    set_int_at(displacement_offset, insn);
 +  }
 +
-+  // Implements a variant of EncodeISOArrayNode that encode ASCII only
-+  static const bool supports_encode_ascii_array = false;
++  void verify_alignment() {} // do nothing on riscv
++  void verify();
++  void print();
++
++  // Creation
++  inline friend NativeCall* nativeCall_at(address addr);
++  inline friend NativeCall* nativeCall_before(address return_address);
 +
-+  // Returns pre-selection estimated size of a vector operation.
-+  static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
-+    return 0;
++  static bool is_call_before(address return_address) {
++    return is_call_at(return_address - NativeCall::return_address_offset);
 +  }
 +
-+#endif // CPU_RISCV_MATCHER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
-new file mode 100644
-index 00000000000..1f7c0c87c21
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
-@@ -0,0 +1,461 @@
-+/*
-+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  // MT-safe patching of a call instruction.
++  static void insert(address code_pos, address entry);
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.hpp"
-+#include "classfile/javaClasses.inline.hpp"
-+#include "classfile/vmClasses.hpp"
-+#include "interpreter/interpreter.hpp"
-+#include "interpreter/interpreterRuntime.hpp"
-+#include "memory/allocation.inline.hpp"
-+#include "prims/jvmtiExport.hpp"
-+#include "prims/methodHandles.hpp"
-+#include "runtime/flags/flagSetting.hpp"
-+#include "runtime/frame.inline.hpp"
-+#include "runtime/stubRoutines.hpp"
++  static void replace_mt_safe(address instr_addr, address code_buffer);
 +
-+#define __ _masm->
++  // Similar to replace_mt_safe, but just changes the destination.  The
++  // important thing is that free-running threads are able to execute
++  // this call instruction at all times.  If the call is an immediate BL
++  // instruction we can simply rely on atomicity of 32-bit writes to
++  // make sure other threads will see no intermediate states.
 +
-+#ifdef PRODUCT
-+#define BLOCK_COMMENT(str) /* nothing */
-+#else
-+#define BLOCK_COMMENT(str) __ block_comment(str)
-+#endif
++  // We cannot rely on locks here, since the free-running threads must run at
++  // full speed.
++  //
++  // Used in the runtime linkage of calls; see class CompiledIC.
++  // (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.)
 +
-+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++  // The parameter assert_lock disables the assertion during code generation.
++  void set_destination_mt_safe(address dest, bool assert_lock = true);
 +
-+void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {
-+  assert_cond(_masm != NULL);
-+  if (VerifyMethodHandles) {
-+    verify_klass(_masm, klass_reg, VM_CLASS_ID(java_lang_Class),
-+                 "MH argument is a Class");
-+  }
-+  __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset()));
-+}
++  address get_trampoline();
++};
 +
-+#ifdef ASSERT
-+static int check_nonzero(const char* xname, int x) {
-+  assert(x != 0, "%s should be nonzero", xname);
-+  return x;
++inline NativeCall* nativeCall_at(address addr) {
++  assert_cond(addr != NULL);
++  NativeCall* call = (NativeCall*)(addr - NativeCall::instruction_offset);
++  DEBUG_ONLY(call->verify());
++  return call;
 +}
-+#define NONZERO(x) check_nonzero(#x, x)
-+#else //ASSERT
-+#define NONZERO(x) (x)
-+#endif //PRODUCT
 +
-+#ifdef ASSERT
-+void MethodHandles::verify_klass(MacroAssembler* _masm,
-+                                 Register obj, vmClassID klass_id,
-+                                 const char* error_message) {
-+  assert_cond(_masm != NULL);
-+  InstanceKlass** klass_addr = vmClasses::klass_addr_at(klass_id);
-+  Klass* klass = vmClasses::klass_at(klass_id);
-+  Register temp = t1;
-+  Register temp2 = t0; // used by MacroAssembler::cmpptr
-+  Label L_ok, L_bad;
-+  BLOCK_COMMENT("verify_klass {");
-+  __ verify_oop(obj);
-+  __ beqz(obj, L_bad);
-+  __ push_reg(RegSet::of(temp, temp2), sp);
-+  __ load_klass(temp, obj);
-+  __ cmpptr(temp, ExternalAddress((address) klass_addr), L_ok);
-+  intptr_t super_check_offset = klass->super_check_offset();
-+  __ ld(temp, Address(temp, super_check_offset));
-+  __ cmpptr(temp, ExternalAddress((address) klass_addr), L_ok);
-+  __ pop_reg(RegSet::of(temp, temp2), sp);
-+  __ bind(L_bad);
-+  __ stop(error_message);
-+  __ BIND(L_ok);
-+  __ pop_reg(RegSet::of(temp, temp2), sp);
-+  BLOCK_COMMENT("} verify_klass");
++inline NativeCall* nativeCall_before(address return_address) {
++  assert_cond(return_address != NULL);
++  NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset);
++  DEBUG_ONLY(call->verify());
++  return call;
 +}
 +
-+void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {}
-+
-+#endif //ASSERT
-+
-+void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
-+                                            bool for_compiler_entry) {
-+  assert_cond(_masm != NULL);
-+  assert(method == xmethod, "interpreter calling convention");
-+  Label L_no_such_method;
-+  __ beqz(xmethod, L_no_such_method);
-+  __ verify_method_ptr(method);
-+
-+  if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) {
-+    Label run_compiled_code;
-+    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
-+    // compiled code in threads for which the event is enabled.  Check here for
-+    // interp_only_mode if these events CAN be enabled.
++// An interface for accessing/manipulating native mov reg, imm instructions.
++// (used to manipulate inlined 64-bit data calls, etc.)
++class NativeMovConstReg: public NativeInstruction {
++ public:
++  enum RISCV_specific_constants {
++    movptr_instruction_size             =    6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, addi.  See movptr().
++    movptr_with_offset_instruction_size =    5 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli. See movptr_with_offset().
++    load_pc_relative_instruction_size   =    2 * NativeInstruction::instruction_size, // auipc, ld
++    instruction_offset                  =    0,
++    displacement_offset                 =    0
++  };
 +
-+    __ lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset()));
-+    __ beqz(t0, run_compiled_code);
-+    __ ld(t0, Address(method, Method::interpreter_entry_offset()));
-+    __ jr(t0);
-+    __ BIND(run_compiled_code);
++  address instruction_address() const       { return addr_at(instruction_offset); }
++  address next_instruction_address() const  {
++    // if the instruction at 5 * instruction_size is addi,
++    // it means a lui + addi + slli + addi + slli + addi instruction sequence,
++    // and the next instruction address should be addr_at(6 * instruction_size).
++    // However, when the instruction at 5 * instruction_size isn't addi,
++    // the next instruction address should be addr_at(5 * instruction_size)
++    if (nativeInstruction_at(instruction_address())->is_movptr()) {
++      if (is_addi_at(addr_at(movptr_with_offset_instruction_size))) {
++        // Assume: lui, addi, slli, addi, slli, addi
++        return addr_at(movptr_instruction_size);
++      } else {
++        // Assume: lui, addi, slli, addi, slli
++        return addr_at(movptr_with_offset_instruction_size);
++      }
++    } else if (is_load_pc_relative_at(instruction_address())) {
++      // Assume: auipc, ld
++      return addr_at(load_pc_relative_instruction_size);
++    }
++    guarantee(false, "Unknown instruction in NativeMovConstReg");
++    return NULL;
 +  }
 +
-+  const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() :
-+                                                     Method::from_interpreted_offset();
-+  __ ld(t0,Address(method, entry_offset));
-+  __ jr(t0);
-+  __ bind(L_no_such_method);
-+  __ far_jump(RuntimeAddress(StubRoutines::throw_AbstractMethodError_entry()));
-+}
++  intptr_t data() const;
++  void set_data(intptr_t x);
 +
-+void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
-+                                        Register recv, Register method_temp,
-+                                        Register temp2,
-+                                        bool for_compiler_entry) {
-+  assert_cond(_masm != NULL);
-+  BLOCK_COMMENT("jump_to_lambda_form {");
-+  // This is the initial entry point of a lazy method handle.
-+  // After type checking, it picks up the invoker from the LambdaForm.
-+  assert_different_registers(recv, method_temp, temp2);
-+  assert(recv != noreg, "required register");
-+  assert(method_temp == xmethod, "required register for loading method");
++  void flush() {
++    if (!maybe_cpool_ref(instruction_address())) {
++      ICache::invalidate_range(instruction_address(), movptr_instruction_size);
++    }
++  }
 +
-+  // Load the invoker, as MH -> MH.form -> LF.vmentry
-+  __ verify_oop(recv);
-+  __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset())), temp2);
-+  __ verify_oop(method_temp);
-+  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset())), temp2);
-+  __ verify_oop(method_temp);
-+  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset())), temp2);
-+  __ verify_oop(method_temp);
-+  __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())), noreg, noreg);
++  void verify();
++  void print();
 +
-+  if (VerifyMethodHandles && !for_compiler_entry) {
-+    // make sure recv is already on stack
-+    __ ld(temp2, Address(method_temp, Method::const_offset()));
-+    __ load_sized_value(temp2,
-+                        Address(temp2, ConstMethod::size_of_parameters_offset()),
-+                        sizeof(u2), /*is_signed*/ false);
-+    Label L;
-+    __ ld(t0, __ argument_address(temp2, -1));
-+    __ beq(recv, t0, L);
-+    __ ld(x10, __ argument_address(temp2, -1));
-+    __ ebreak();
-+    __ BIND(L);
-+  }
++  // Creation
++  inline friend NativeMovConstReg* nativeMovConstReg_at(address addr);
++  inline friend NativeMovConstReg* nativeMovConstReg_before(address addr);
++};
 +
-+  jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry);
-+  BLOCK_COMMENT("} jump_to_lambda_form");
++inline NativeMovConstReg* nativeMovConstReg_at(address addr) {
++  assert_cond(addr != NULL);
++  NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_offset);
++  DEBUG_ONLY(test->verify());
++  return test;
 +}
 +
-+// Code generation
-+address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
-+                                                                vmIntrinsics::ID iid) {
-+  assert_cond(_masm != NULL);
-+  const bool not_for_compiler_entry = false;  // this is the interpreter entry
-+  assert(is_signature_polymorphic(iid), "expected invoke iid");
-+  if (iid == vmIntrinsics::_invokeGeneric ||
-+      iid == vmIntrinsics::_compiledLambdaForm) {
-+    // Perhaps surprisingly, the symbolic references visible to Java are not directly used.
-+    // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
-+    // They all allow an appendix argument.
-+    __ ebreak();           // empty stubs make SG sick
-+    return NULL;
-+  }
++inline NativeMovConstReg* nativeMovConstReg_before(address addr) {
++  assert_cond(addr != NULL);
++  NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset);
++  DEBUG_ONLY(test->verify());
++  return test;
++}
 +
-+  // No need in interpreter entry for linkToNative for now.
-+  // Interpreter calls compiled entry through i2c.
-+  if (iid == vmIntrinsics::_linkToNative) {
-+    __ ebreak();
-+    return NULL;
-+  }
++// RISCV should not use C1 runtime patching, but still implement
++// NativeMovRegMem to keep some compilers happy.
++class NativeMovRegMem: public NativeInstruction {
++ public:
++  enum RISCV_specific_constants {
++    instruction_size            =    NativeInstruction::instruction_size,
++    instruction_offset          =    0,
++    data_offset                 =    0,
++    next_instruction_offset     =    NativeInstruction::instruction_size
++  };
 +
-+  // x30: sender SP (must preserve; see prepare_to_jump_from_interpreted)
-+  // xmethod: Method*
-+  // x13: argument locator (parameter slot count, added to sp)
-+  // x11: used as temp to hold mh or receiver
-+  // x10, x29: garbage temps, blown away
-+  Register argp   = x13;   // argument list ptr, live on error paths
-+  Register mh     = x11;   // MH receiver; dies quickly and is recycled
++  int instruction_start() const { return instruction_offset; }
 +
-+  // here's where control starts out:
-+  __ align(CodeEntryAlignment);
-+  address entry_point = __ pc();
++  address instruction_address() const { return addr_at(instruction_offset); }
 +
-+  if (VerifyMethodHandles) {
-+    assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
++  int num_bytes_to_end_of_patch() const { return instruction_offset + instruction_size; }
 +
-+    Label L;
-+    BLOCK_COMMENT("verify_intrinsic_id {");
-+    __ lhu(t0, Address(xmethod, Method::intrinsic_id_offset_in_bytes()));
-+    __ mv(t1, (int) iid);
-+    __ beq(t0, t1, L);
-+    if (iid == vmIntrinsics::_linkToVirtual ||
-+        iid == vmIntrinsics::_linkToSpecial) {
-+      // could do this for all kinds, but would explode assembly code size
-+      trace_method_handle(_masm, "bad Method*::intrinsic_id");
-+    }
-+    __ ebreak();
-+    __ bind(L);
-+    BLOCK_COMMENT("} verify_intrinsic_id");
-+  }
++  int offset() const;
 +
-+  // First task:  Find out how big the argument list is.
-+  Address x13_first_arg_addr;
-+  int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
-+  assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
-+  if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
-+    __ ld(argp, Address(xmethod, Method::const_offset()));
-+    __ load_sized_value(argp,
-+                        Address(argp, ConstMethod::size_of_parameters_offset()),
-+                        sizeof(u2), /*is_signed*/ false);
-+    x13_first_arg_addr = __ argument_address(argp, -1);
-+  } else {
-+    DEBUG_ONLY(argp = noreg);
-+  }
++  void set_offset(int x);
 +
-+  if (!is_signature_polymorphic_static(iid)) {
-+    __ ld(mh, x13_first_arg_addr);
-+    DEBUG_ONLY(argp = noreg);
++  void add_offset_in_bytes(int add_offset) {
++    set_offset(offset() + add_offset);
 +  }
 +
-+  // x13_first_arg_addr is live!
++  void verify();
++  void print();
 +
-+  trace_method_handle_interpreter_entry(_masm, iid);
-+  if (iid == vmIntrinsics::_invokeBasic) {
-+    generate_method_handle_dispatch(_masm, iid, mh, noreg, not_for_compiler_entry);
-+  } else {
-+    // Adjust argument list by popping the trailing MemberName argument.
-+    Register recv = noreg;
-+    if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
-+      // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack.
-+      __ ld(recv = x12, x13_first_arg_addr);
-+    }
-+    DEBUG_ONLY(argp = noreg);
-+    Register xmember = xmethod;  // MemberName ptr; incoming method ptr is dead now
-+    __ pop_reg(xmember);             // extract last argument
-+    generate_method_handle_dispatch(_masm, iid, recv, xmember, not_for_compiler_entry);
-+  }
++ private:
++  inline friend NativeMovRegMem* nativeMovRegMem_at(address addr);
++};
 +
-+  return entry_point;
++inline NativeMovRegMem* nativeMovRegMem_at(address addr) {
++  NativeMovRegMem* test = (NativeMovRegMem*)(addr - NativeMovRegMem::instruction_offset);
++  DEBUG_ONLY(test->verify());
++  return test;
 +}
 +
++class NativeJump: public NativeInstruction {
++ public:
++  enum RISCV_specific_constants {
++    instruction_size            =    NativeInstruction::instruction_size,
++    instruction_offset          =    0,
++    data_offset                 =    0,
++    next_instruction_offset     =    NativeInstruction::instruction_size
++  };
 +
-+void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
-+                                                    vmIntrinsics::ID iid,
-+                                                    Register receiver_reg,
-+                                                    Register member_reg,
-+                                                    bool for_compiler_entry) {
-+  assert_cond(_masm != NULL);
-+  assert(is_signature_polymorphic(iid), "expected invoke iid");
-+  // temps used in this code are not used in *either* compiled or interpreted calling sequences
-+  Register temp1 = x7;
-+  Register temp2 = x28;
-+  Register temp3 = x29;  // x30 is live by this point: it contains the sender SP
-+  if (for_compiler_entry) {
-+    assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment");
-+    assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
-+    assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
-+    assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
-+  }
++  address instruction_address() const       { return addr_at(instruction_offset); }
++  address next_instruction_address() const  { return addr_at(instruction_size); }
++  address jump_destination() const;
++  void set_jump_destination(address dest);
 +
-+  assert_different_registers(temp1, temp2, temp3, receiver_reg);
-+  assert_different_registers(temp1, temp2, temp3, member_reg);
++  // Creation
++  inline friend NativeJump* nativeJump_at(address address);
 +
-+  if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) {
-+    if (iid == vmIntrinsics::_linkToNative) {
-+      assert(for_compiler_entry, "only compiler entry is supported");
-+    }
-+    // indirect through MH.form.vmentry.vmtarget
-+    jump_to_lambda_form(_masm, receiver_reg, xmethod, temp1, for_compiler_entry);
-+  } else {
-+    // The method is a member invoker used by direct method handles.
-+    if (VerifyMethodHandles) {
-+      // make sure the trailing argument really is a MemberName (caller responsibility)
-+      verify_klass(_masm, member_reg, VM_CLASS_ID(java_lang_invoke_MemberName),
-+                   "MemberName required for invokeVirtual etc.");
-+    }
++  void verify();
 +
-+    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset()));
-+    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset()));
-+    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset()));
-+    Address vmtarget_method( xmethod, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset()));
++  // Insertion of native jump instruction
++  static void insert(address code_pos, address entry);
++  // MT-safe insertion of native jump at verified method entry
++  static void check_verified_entry_alignment(address entry, address verified_entry);
++  static void patch_verified_entry(address entry, address verified_entry, address dest);
++};
 +
-+    Register temp1_recv_klass = temp1;
-+    if (iid != vmIntrinsics::_linkToStatic) {
-+      __ verify_oop(receiver_reg);
-+      if (iid == vmIntrinsics::_linkToSpecial) {
-+        // Don't actually load the klass; just null-check the receiver.
-+        __ null_check(receiver_reg);
-+      } else {
-+        // load receiver klass itself
-+        __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes());
-+        __ load_klass(temp1_recv_klass, receiver_reg);
-+        __ verify_klass_ptr(temp1_recv_klass);
-+      }
-+      BLOCK_COMMENT("check_receiver {");
-+      // The receiver for the MemberName must be in receiver_reg.
-+      // Check the receiver against the MemberName.clazz
-+      if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
-+        // Did not load it above...
-+        __ load_klass(temp1_recv_klass, receiver_reg);
-+        __ verify_klass_ptr(temp1_recv_klass);
-+      }
-+      if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
-+        Label L_ok;
-+        Register temp2_defc = temp2;
-+        __ load_heap_oop(temp2_defc, member_clazz, temp3);
-+        load_klass_from_Class(_masm, temp2_defc);
-+        __ verify_klass_ptr(temp2_defc);
-+        __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok);
-+        // If we get here, the type check failed!
-+        __ ebreak();
-+        __ bind(L_ok);
-+      }
-+      BLOCK_COMMENT("} check_receiver");
-+    }
-+    if (iid == vmIntrinsics::_linkToSpecial ||
-+        iid == vmIntrinsics::_linkToStatic) {
-+      DEBUG_ONLY(temp1_recv_klass = noreg);  // these guys didn't load the recv_klass
-+    }
++inline NativeJump* nativeJump_at(address addr) {
++  NativeJump* jump = (NativeJump*)(addr - NativeJump::instruction_offset);
++  DEBUG_ONLY(jump->verify());
++  return jump;
++}
 +
-+    // Live registers at this point:
-+    //  member_reg - MemberName that was the trailing argument
-+    //  temp1_recv_klass - klass of stacked receiver, if needed
-+    //  x30 - interpreter linkage (if interpreted)
-+    //  x11 ... x10 - compiler arguments (if compiled)
++class NativeGeneralJump: public NativeJump {
++public:
++  enum RISCV_specific_constants {
++    instruction_size            =    6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, jalr
++    instruction_offset          =    0,
++    data_offset                 =    0,
++    next_instruction_offset     =    6 * NativeInstruction::instruction_size  // lui, addi, slli, addi, slli, jalr
++  };
 +
-+    Label L_incompatible_class_change_error;
-+    switch (iid) {
-+      case vmIntrinsics::_linkToSpecial:
-+        if (VerifyMethodHandles) {
-+          verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3);
-+        }
-+        __ load_heap_oop(xmethod, member_vmtarget);
-+        __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg);
-+        break;
++  address jump_destination() const;
 +
-+      case vmIntrinsics::_linkToStatic:
-+        if (VerifyMethodHandles) {
-+          verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3);
-+        }
-+        __ load_heap_oop(xmethod, member_vmtarget);
-+        __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg);
-+        break;
++  static void insert_unconditional(address code_pos, address entry);
++  static void replace_mt_safe(address instr_addr, address code_buffer);
++};
 +
-+      case vmIntrinsics::_linkToVirtual:
-+      {
-+        // same as TemplateTable::invokevirtual,
-+        // minus the CP setup and profiling:
++inline NativeGeneralJump* nativeGeneralJump_at(address addr) {
++  assert_cond(addr != NULL);
++  NativeGeneralJump* jump = (NativeGeneralJump*)(addr);
++  debug_only(jump->verify();)
++  return jump;
++}
 +
-+        if (VerifyMethodHandles) {
-+          verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3);
-+        }
++class NativeIllegalInstruction: public NativeInstruction {
++ public:
++  // Insert illegal opcode as specific address
++  static void insert(address code_pos);
++};
 +
-+        // pick out the vtable index from the MemberName, and then we can discard it:
-+        Register temp2_index = temp2;
-+        __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg);
++inline bool NativeInstruction::is_nop()         {
++  uint32_t insn = *(uint32_t*)addr_at(0);
++  return insn == 0x13;
++}
 +
-+        if (VerifyMethodHandles) {
-+          Label L_index_ok;
-+          __ bgez(temp2_index, L_index_ok);
-+          __ ebreak();
-+          __ BIND(L_index_ok);
-+        }
++inline bool NativeInstruction::is_jump_or_nop() {
++  return is_nop() || is_jump();
++}
 +
-+        // Note:  The verifier invariants allow us to ignore MemberName.clazz and vmtarget
-+        // at this point.  And VerifyMethodHandles has already checked clazz, if needed.
++// Call trampoline stubs.
++class NativeCallTrampolineStub : public NativeInstruction {
++ public:
 +
-+        // get target Method* & entry point
-+        __ lookup_virtual_method(temp1_recv_klass, temp2_index, xmethod);
-+        break;
-+      }
++  enum RISCV_specific_constants {
++    // Refer to function emit_trampoline_stub.
++    instruction_size = 3 * NativeInstruction::instruction_size + wordSize, // auipc + ld + jr + target address
++    data_offset      = 3 * NativeInstruction::instruction_size,            // auipc + ld + jr
++  };
 +
-+      case vmIntrinsics::_linkToInterface:
-+      {
-+        // same as TemplateTable::invokeinterface
-+        // (minus the CP setup and profiling, with different argument motion)
-+        if (VerifyMethodHandles) {
-+          verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
-+        }
++  address destination(nmethod *nm = NULL) const;
++  void set_destination(address new_destination);
++  ptrdiff_t destination_offset() const;
++};
 +
-+        Register temp3_intf = temp3;
-+        __ load_heap_oop(temp3_intf, member_clazz);
-+        load_klass_from_Class(_masm, temp3_intf);
-+        __ verify_klass_ptr(temp3_intf);
++inline bool is_NativeCallTrampolineStub_at(address addr) {
++  // Ensure that the stub is exactly
++  //      ld   t0, L--->auipc + ld
++  //      jr   t0
++  // L:
 +
-+        Register rindex = xmethod;
-+        __ access_load_at(T_ADDRESS, IN_HEAP, rindex, member_vmindex, noreg, noreg);
-+        if (VerifyMethodHandles) {
-+          Label L;
-+          __ bgez(rindex, L);
-+          __ ebreak();
-+          __ bind(L);
-+        }
++  // judge inst + register + imm
++  // 1). check the instructions: auipc + ld + jalr
++  // 2). check if auipc[11:7] == t0 and ld[11:7] == t0 and ld[19:15] == t0 && jr[19:15] == t0
++  // 3). check if the offset in ld[31:20] equals the data_offset
++  assert_cond(addr != NULL);
++  const int instr_size = NativeInstruction::instruction_size;
++  if (NativeInstruction::is_auipc_at(addr) &&
++      NativeInstruction::is_ld_at(addr + instr_size) &&
++      NativeInstruction::is_jalr_at(addr + 2 * instr_size) &&
++      (NativeInstruction::extract_rd(addr)                    == x5) &&
++      (NativeInstruction::extract_rd(addr + instr_size)       == x5) &&
++      (NativeInstruction::extract_rs1(addr + instr_size)      == x5) &&
++      (NativeInstruction::extract_rs1(addr + 2 * instr_size)  == x5) &&
++      (Assembler::extract(((unsigned*)addr)[1], 31, 20) == NativeCallTrampolineStub::data_offset)) {
++    return true;
++  }
++  return false;
++}
 +
-+        // given intf, index, and recv klass, dispatch to the implementation method
-+        __ lookup_interface_method(temp1_recv_klass, temp3_intf,
-+                                   // note: next two args must be the same:
-+                                   rindex, xmethod,
-+                                   temp2,
-+                                   L_incompatible_class_change_error);
-+        break;
-+      }
++inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) {
++  assert_cond(addr != NULL);
++  assert(is_NativeCallTrampolineStub_at(addr), "no call trampoline found");
++  return (NativeCallTrampolineStub*)addr;
++}
 +
-+      default:
-+        fatal("unexpected intrinsic %d: %s", vmIntrinsics::as_int(iid), vmIntrinsics::name_at(iid));
-+        break;
-+    }
++class NativeMembar : public NativeInstruction {
++public:
++  uint32_t get_kind();
++  void set_kind(uint32_t order_kind);
++};
 +
-+    // live at this point:  xmethod, x30 (if interpreted)
++inline NativeMembar *NativeMembar_at(address addr) {
++  assert_cond(addr != NULL);
++  assert(nativeInstruction_at(addr)->is_membar(), "no membar found");
++  return (NativeMembar*)addr;
++}
 +
-+    // After figuring out which concrete method to call, jump into it.
-+    // Note that this works in the interpreter with no data motion.
-+    // But the compiled version will require that r2_recv be shifted out.
-+    __ verify_method_ptr(xmethod);
-+    jump_from_method_handle(_masm, xmethod, temp1, for_compiler_entry);
-+    if (iid == vmIntrinsics::_linkToInterface) {
-+      __ bind(L_incompatible_class_change_error);
-+      __ far_jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry()));
-+    }
-+  }
++#endif // CPU_RISCV_NATIVEINST_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.hpp b/src/hotspot/cpu/riscv/registerMap_riscv.hpp
+new file mode 100644
+index 0000000000..fef8ca9b64
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/registerMap_riscv.hpp
+@@ -0,0 +1,42 @@
++/*
++ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+}
++#ifndef CPU_RISCV_REGISTERMAP_RISCV_HPP
++#define CPU_RISCV_REGISTERMAP_RISCV_HPP
 +
-+#ifndef PRODUCT
-+void trace_method_handle_stub(const char* adaptername,
-+                              oopDesc* mh,
-+                              intptr_t* saved_regs,
-+                              intptr_t* entry_sp) {  }
++// machine-dependent implemention for register maps
++  friend class frame;
 +
-+// The stub wraps the arguments in a struct on the stack to avoid
-+// dealing with the different calling conventions for passing 6
-+// arguments.
-+struct MethodHandleStubArguments {
-+  const char* adaptername;
-+  oopDesc* mh;
-+  intptr_t* saved_regs;
-+  intptr_t* entry_sp;
-+};
-+void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) {  }
++ private:
++  // This is the hook for finding a register in an "well-known" location,
++  // such as a register block of a predetermined format.
++  address pd_location(VMReg reg) const { return NULL; }
 +
-+void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {  }
-+#endif //PRODUCT
-diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
++  // no PD state to clear or copy:
++  void pd_clear() {}
++  void pd_initialize() {}
++  void pd_initialize_from(const RegisterMap* map) {}
++
++#endif // CPU_RISCV_REGISTERMAP_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/register_definitions_riscv.cpp b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp
 new file mode 100644
-index 00000000000..f73aba29d67
+index 0000000000..583f67573c
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
-@@ -0,0 +1,57 @@
++++ b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp
+@@ -0,0 +1,192 @@
 +/*
-+ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -28113,47 +26931,180 @@ index 00000000000..f73aba29d67
 + *
 + */
 +
-+// Platform-specific definitions for method handles.
-+// These definitions are inlined into class MethodHandles.
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/register.hpp"
++#include "interp_masm_riscv.hpp"
++#include "register_riscv.hpp"
 +
-+// Adapters
-+enum /* platform_dependent_constants */ {
-+  adapter_code_size = 32000 DEBUG_ONLY(+ 120000)
-+};
++REGISTER_DEFINITION(Register, noreg);
 +
-+public:
++REGISTER_DEFINITION(Register, x0);
++REGISTER_DEFINITION(Register, x1);
++REGISTER_DEFINITION(Register, x2);
++REGISTER_DEFINITION(Register, x3);
++REGISTER_DEFINITION(Register, x4);
++REGISTER_DEFINITION(Register, x5);
++REGISTER_DEFINITION(Register, x6);
++REGISTER_DEFINITION(Register, x7);
++REGISTER_DEFINITION(Register, x8);
++REGISTER_DEFINITION(Register, x9);
++REGISTER_DEFINITION(Register, x10);
++REGISTER_DEFINITION(Register, x11);
++REGISTER_DEFINITION(Register, x12);
++REGISTER_DEFINITION(Register, x13);
++REGISTER_DEFINITION(Register, x14);
++REGISTER_DEFINITION(Register, x15);
++REGISTER_DEFINITION(Register, x16);
++REGISTER_DEFINITION(Register, x17);
++REGISTER_DEFINITION(Register, x18);
++REGISTER_DEFINITION(Register, x19);
++REGISTER_DEFINITION(Register, x20);
++REGISTER_DEFINITION(Register, x21);
++REGISTER_DEFINITION(Register, x22);
++REGISTER_DEFINITION(Register, x23);
++REGISTER_DEFINITION(Register, x24);
++REGISTER_DEFINITION(Register, x25);
++REGISTER_DEFINITION(Register, x26);
++REGISTER_DEFINITION(Register, x27);
++REGISTER_DEFINITION(Register, x28);
++REGISTER_DEFINITION(Register, x29);
++REGISTER_DEFINITION(Register, x30);
++REGISTER_DEFINITION(Register, x31);
 +
-+  static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg);
++REGISTER_DEFINITION(FloatRegister, fnoreg);
 +
-+  static void verify_klass(MacroAssembler* _masm,
-+                           Register obj, vmClassID klass_id,
-+                           const char* error_message = "wrong klass") NOT_DEBUG_RETURN;
++REGISTER_DEFINITION(FloatRegister, f0);
++REGISTER_DEFINITION(FloatRegister, f1);
++REGISTER_DEFINITION(FloatRegister, f2);
++REGISTER_DEFINITION(FloatRegister, f3);
++REGISTER_DEFINITION(FloatRegister, f4);
++REGISTER_DEFINITION(FloatRegister, f5);
++REGISTER_DEFINITION(FloatRegister, f6);
++REGISTER_DEFINITION(FloatRegister, f7);
++REGISTER_DEFINITION(FloatRegister, f8);
++REGISTER_DEFINITION(FloatRegister, f9);
++REGISTER_DEFINITION(FloatRegister, f10);
++REGISTER_DEFINITION(FloatRegister, f11);
++REGISTER_DEFINITION(FloatRegister, f12);
++REGISTER_DEFINITION(FloatRegister, f13);
++REGISTER_DEFINITION(FloatRegister, f14);
++REGISTER_DEFINITION(FloatRegister, f15);
++REGISTER_DEFINITION(FloatRegister, f16);
++REGISTER_DEFINITION(FloatRegister, f17);
++REGISTER_DEFINITION(FloatRegister, f18);
++REGISTER_DEFINITION(FloatRegister, f19);
++REGISTER_DEFINITION(FloatRegister, f20);
++REGISTER_DEFINITION(FloatRegister, f21);
++REGISTER_DEFINITION(FloatRegister, f22);
++REGISTER_DEFINITION(FloatRegister, f23);
++REGISTER_DEFINITION(FloatRegister, f24);
++REGISTER_DEFINITION(FloatRegister, f25);
++REGISTER_DEFINITION(FloatRegister, f26);
++REGISTER_DEFINITION(FloatRegister, f27);
++REGISTER_DEFINITION(FloatRegister, f28);
++REGISTER_DEFINITION(FloatRegister, f29);
++REGISTER_DEFINITION(FloatRegister, f30);
++REGISTER_DEFINITION(FloatRegister, f31);
 +
-+  static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) {
-+    verify_klass(_masm, mh_reg, VM_CLASS_ID(java_lang_invoke_MethodHandle),
-+                 "reference is a MH");
-+  }
++REGISTER_DEFINITION(VectorRegister, vnoreg);
 +
-+  static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
++REGISTER_DEFINITION(VectorRegister, v0);
++REGISTER_DEFINITION(VectorRegister, v1);
++REGISTER_DEFINITION(VectorRegister, v2);
++REGISTER_DEFINITION(VectorRegister, v3);
++REGISTER_DEFINITION(VectorRegister, v4);
++REGISTER_DEFINITION(VectorRegister, v5);
++REGISTER_DEFINITION(VectorRegister, v6);
++REGISTER_DEFINITION(VectorRegister, v7);
++REGISTER_DEFINITION(VectorRegister, v8);
++REGISTER_DEFINITION(VectorRegister, v9);
++REGISTER_DEFINITION(VectorRegister, v10);
++REGISTER_DEFINITION(VectorRegister, v11);
++REGISTER_DEFINITION(VectorRegister, v12);
++REGISTER_DEFINITION(VectorRegister, v13);
++REGISTER_DEFINITION(VectorRegister, v14);
++REGISTER_DEFINITION(VectorRegister, v15);
++REGISTER_DEFINITION(VectorRegister, v16);
++REGISTER_DEFINITION(VectorRegister, v17);
++REGISTER_DEFINITION(VectorRegister, v18);
++REGISTER_DEFINITION(VectorRegister, v19);
++REGISTER_DEFINITION(VectorRegister, v20);
++REGISTER_DEFINITION(VectorRegister, v21);
++REGISTER_DEFINITION(VectorRegister, v22);
++REGISTER_DEFINITION(VectorRegister, v23);
++REGISTER_DEFINITION(VectorRegister, v24);
++REGISTER_DEFINITION(VectorRegister, v25);
++REGISTER_DEFINITION(VectorRegister, v26);
++REGISTER_DEFINITION(VectorRegister, v27);
++REGISTER_DEFINITION(VectorRegister, v28);
++REGISTER_DEFINITION(VectorRegister, v29);
++REGISTER_DEFINITION(VectorRegister, v30);
++REGISTER_DEFINITION(VectorRegister, v31);
 +
-+  // Similar to InterpreterMacroAssembler::jump_from_interpreted.
-+  // Takes care of special dispatch from single stepping too.
-+  static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
-+                                      bool for_compiler_entry);
++REGISTER_DEFINITION(Register, c_rarg0);
++REGISTER_DEFINITION(Register, c_rarg1);
++REGISTER_DEFINITION(Register, c_rarg2);
++REGISTER_DEFINITION(Register, c_rarg3);
++REGISTER_DEFINITION(Register, c_rarg4);
++REGISTER_DEFINITION(Register, c_rarg5);
++REGISTER_DEFINITION(Register, c_rarg6);
++REGISTER_DEFINITION(Register, c_rarg7);
 +
-+  static void jump_to_lambda_form(MacroAssembler* _masm,
-+                                  Register recv, Register method_temp,
-+                                  Register temp2,
-+                                  bool for_compiler_entry);
-diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
++REGISTER_DEFINITION(FloatRegister, c_farg0);
++REGISTER_DEFINITION(FloatRegister, c_farg1);
++REGISTER_DEFINITION(FloatRegister, c_farg2);
++REGISTER_DEFINITION(FloatRegister, c_farg3);
++REGISTER_DEFINITION(FloatRegister, c_farg4);
++REGISTER_DEFINITION(FloatRegister, c_farg5);
++REGISTER_DEFINITION(FloatRegister, c_farg6);
++REGISTER_DEFINITION(FloatRegister, c_farg7);
++
++REGISTER_DEFINITION(Register, j_rarg0);
++REGISTER_DEFINITION(Register, j_rarg1);
++REGISTER_DEFINITION(Register, j_rarg2);
++REGISTER_DEFINITION(Register, j_rarg3);
++REGISTER_DEFINITION(Register, j_rarg4);
++REGISTER_DEFINITION(Register, j_rarg5);
++REGISTER_DEFINITION(Register, j_rarg6);
++REGISTER_DEFINITION(Register, j_rarg7);
++
++REGISTER_DEFINITION(FloatRegister, j_farg0);
++REGISTER_DEFINITION(FloatRegister, j_farg1);
++REGISTER_DEFINITION(FloatRegister, j_farg2);
++REGISTER_DEFINITION(FloatRegister, j_farg3);
++REGISTER_DEFINITION(FloatRegister, j_farg4);
++REGISTER_DEFINITION(FloatRegister, j_farg5);
++REGISTER_DEFINITION(FloatRegister, j_farg6);
++REGISTER_DEFINITION(FloatRegister, j_farg7);
++
++REGISTER_DEFINITION(Register, zr);
++REGISTER_DEFINITION(Register, gp);
++REGISTER_DEFINITION(Register, tp);
++REGISTER_DEFINITION(Register, xmethod);
++REGISTER_DEFINITION(Register, ra);
++REGISTER_DEFINITION(Register, sp);
++REGISTER_DEFINITION(Register, fp);
++REGISTER_DEFINITION(Register, xheapbase);
++REGISTER_DEFINITION(Register, xcpool);
++REGISTER_DEFINITION(Register, xmonitors);
++REGISTER_DEFINITION(Register, xlocals);
++REGISTER_DEFINITION(Register, xthread);
++REGISTER_DEFINITION(Register, xbcp);
++REGISTER_DEFINITION(Register, xdispatch);
++REGISTER_DEFINITION(Register, esp);
++
++REGISTER_DEFINITION(Register, t0);
++REGISTER_DEFINITION(Register, t1);
++REGISTER_DEFINITION(Register, t2);
+diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp
 new file mode 100644
-index 00000000000..0a05c577860
+index 0000000000..ef60cb3bb0
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
-@@ -0,0 +1,429 @@
++++ b/src/hotspot/cpu/riscv/register_riscv.cpp
+@@ -0,0 +1,64 @@
 +/*
-+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -28178,417 +27129,440 @@ index 00000000000..0a05c577860
 + */
 +
 +#include "precompiled.hpp"
-+#include "asm/macroAssembler.hpp"
-+#include "code/compiledIC.hpp"
-+#include "memory/resourceArea.hpp"
-+#include "nativeInst_riscv.hpp"
-+#include "oops/oop.inline.hpp"
-+#include "runtime/handles.hpp"
-+#include "runtime/orderAccess.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "runtime/stubRoutines.hpp"
-+#include "utilities/ostream.hpp"
-+#ifdef COMPILER1
-+#include "c1/c1_Runtime1.hpp"
-+#endif
++#include "register_riscv.hpp"
 +
-+Register NativeInstruction::extract_rs1(address instr) {
-+  assert_cond(instr != NULL);
-+  return as_Register(Assembler::extract(((unsigned*)instr)[0], 19, 15));
-+}
++const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers *
++                                          RegisterImpl::max_slots_per_register;
 +
-+Register NativeInstruction::extract_rs2(address instr) {
-+  assert_cond(instr != NULL);
-+  return as_Register(Assembler::extract(((unsigned*)instr)[0], 24, 20));
-+}
++const int ConcreteRegisterImpl::max_fpr =
++    ConcreteRegisterImpl::max_gpr +
++    FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register;
 +
-+Register NativeInstruction::extract_rd(address instr) {
-+  assert_cond(instr != NULL);
-+  return as_Register(Assembler::extract(((unsigned*)instr)[0], 11, 7));
++const char* RegisterImpl::name() const {
++  static const char *const names[number_of_registers] = {
++    "zr", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "fp", "x9",
++    "c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7",
++    "x18", "x19", "esp", "xdispatch", "xbcp", "xthread", "xlocals",
++    "xmonitors", "xcpool", "xheapbase", "x28", "x29", "x30", "xmethod"
++  };
++  return is_valid() ? names[encoding()] : "noreg";
 +}
 +
-+uint32_t NativeInstruction::extract_opcode(address instr) {
-+  assert_cond(instr != NULL);
-+  return Assembler::extract(((unsigned*)instr)[0], 6, 0);
++const char* FloatRegisterImpl::name() const {
++  static const char *const names[number_of_registers] = {
++    "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",
++    "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",
++    "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
++    "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"
++  };
++  return is_valid() ? names[encoding()] : "noreg";
 +}
 +
-+uint32_t NativeInstruction::extract_funct3(address instr) {
-+  assert_cond(instr != NULL);
-+  return Assembler::extract(((unsigned*)instr)[0], 14, 12);
++const char* VectorRegisterImpl::name() const {
++  static const char *const names[number_of_registers] = {
++    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
++    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
++    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
++    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"
++  };
++  return is_valid() ? names[encoding()] : "noreg";
 +}
+diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp
+new file mode 100644
+index 0000000000..f64a06eb89
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/register_riscv.hpp
+@@ -0,0 +1,381 @@
++/*
++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+bool NativeInstruction::is_pc_relative_at(address instr) {
-+  // auipc + jalr
-+  // auipc + addi
-+  // auipc + load
-+  // auipc + fload_load
-+  return (is_auipc_at(instr)) &&
-+         (is_addi_at(instr + instruction_size) ||
-+          is_jalr_at(instr + instruction_size) ||
-+          is_load_at(instr + instruction_size) ||
-+          is_float_load_at(instr + instruction_size)) &&
-+         check_pc_relative_data_dependency(instr);
-+}
++#ifndef CPU_RISCV_REGISTER_RISCV_HPP
++#define CPU_RISCV_REGISTER_RISCV_HPP
 +
-+// ie:ld(Rd, Label)
-+bool NativeInstruction::is_load_pc_relative_at(address instr) {
-+  return is_auipc_at(instr) && // auipc
-+         is_ld_at(instr + instruction_size) && // ld
-+         check_load_pc_relative_data_dependency(instr);
-+}
++#include "asm/register.hpp"
 +
-+bool NativeInstruction::is_movptr_at(address instr) {
-+  return is_lui_at(instr) && // Lui
-+         is_addi_at(instr + instruction_size) && // Addi
-+         is_slli_shift_at(instr + instruction_size * 2, 11) && // Slli Rd, Rs, 11
-+         is_addi_at(instr + instruction_size * 3) && // Addi
-+         is_slli_shift_at(instr + instruction_size * 4, 5) && // Slli Rd, Rs, 5
-+         (is_addi_at(instr + instruction_size * 5) ||
-+          is_jalr_at(instr + instruction_size * 5) ||
-+          is_load_at(instr + instruction_size * 5)) && // Addi/Jalr/Load
-+         check_movptr_data_dependency(instr);
-+}
++#define CSR_FFLAGS   0x001        // Floating-Point Accrued Exceptions.
++#define CSR_FRM      0x002        // Floating-Point Dynamic Rounding Mode.
++#define CSR_FCSR     0x003        // Floating-Point Control and Status Register (frm + fflags).
++#define CSR_VSTART   0x008        // Vector start position
++#define CSR_VXSAT    0x009        // Fixed-Point Saturate Flag
++#define CSR_VXRM     0x00A        // Fixed-Point Rounding Mode
++#define CSR_VCSR     0x00F        // Vector control and status register
++#define CSR_VL       0xC20        // Vector length
++#define CSR_VTYPE    0xC21        // Vector data type register
++#define CSR_VLENB    0xC22        // VLEN/8 (vector register length in bytes)
++#define CSR_CYCLE    0xc00        // Cycle counter for RDCYCLE instruction.
++#define CSR_TIME     0xc01        // Timer for RDTIME instruction.
++#define CSR_INSTERT  0xc02        // Instructions-retired counter for RDINSTRET instruction.
 +
-+bool NativeInstruction::is_li32_at(address instr) {
-+  return is_lui_at(instr) && // lui
-+         is_addiw_at(instr + instruction_size) && // addiw
-+         check_li32_data_dependency(instr);
-+}
++class VMRegImpl;
++typedef VMRegImpl* VMReg;
 +
-+bool NativeInstruction::is_li64_at(address instr) {
-+  return is_lui_at(instr) && // lui
-+         is_addi_at(instr + instruction_size) && // addi
-+         is_slli_shift_at(instr + instruction_size * 2, 12) &&  // Slli Rd, Rs, 12
-+         is_addi_at(instr + instruction_size * 3) && // addi
-+         is_slli_shift_at(instr + instruction_size * 4, 12) &&  // Slli Rd, Rs, 12
-+         is_addi_at(instr + instruction_size * 5) && // addi
-+         is_slli_shift_at(instr + instruction_size * 6, 8) &&   // Slli Rd, Rs, 8
-+         is_addi_at(instr + instruction_size * 7) && // addi
-+         check_li64_data_dependency(instr);
-+}
++// Use Register as shortcut
++class RegisterImpl;
++typedef RegisterImpl* Register;
 +
-+void NativeCall::verify() {
-+  assert(NativeCall::is_call_at((address)this), "unexpected code at call site");
++inline Register as_Register(int encoding) {
++  return (Register)(intptr_t) encoding;
 +}
 +
-+address NativeCall::destination() const {
-+  address addr = (address)this;
-+  assert(NativeInstruction::is_jal_at(instruction_address()), "inst must be jal.");
-+  address destination = MacroAssembler::target_addr_for_insn(instruction_address());
++class RegisterImpl: public AbstractRegisterImpl {
++ public:
++  enum {
++    number_of_registers      = 32,
++    max_slots_per_register   = 2,
 +
-+  // Do we use a trampoline stub for this call?
-+  CodeBlob* cb = CodeCache::find_blob_unsafe(addr);   // Else we get assertion if nmethod is zombie.
-+  assert(cb && cb->is_nmethod(), "sanity");
-+  nmethod *nm = (nmethod *)cb;
-+  if (nm != NULL && nm->stub_contains(destination) && is_NativeCallTrampolineStub_at(destination)) {
-+    // Yes we do, so get the destination from the trampoline stub.
-+    const address trampoline_stub_addr = destination;
-+    destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination();
-+  }
++    // integer registers x8 - x15 and floating-point registers f8 - f15 are allocatable
++    // for compressed instructions. See Table 17.2 in spec.
++    compressed_register_base = 8,
++    compressed_register_top  = 15,
++  };
 +
-+  return destination;
-+}
++  // derived registers, offsets, and addresses
++  const Register successor() const { return as_Register(encoding() + 1); }
 +
-+// Similar to replace_mt_safe, but just changes the destination. The
-+// important thing is that free-running threads are able to execute this
-+// call instruction at all times.
-+//
-+// Used in the runtime linkage of calls; see class CompiledIC.
-+//
-+// Add parameter assert_lock to switch off assertion
-+// during code generation, where no patching lock is needed.
-+void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
-+  assert(!assert_lock ||
-+         (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()) ||
-+         CompiledICLocker::is_safe(addr_at(0)),
-+         "concurrent code patching");
++  // construction
++  inline friend Register as_Register(int encoding);
 +
-+  ResourceMark rm;
-+  address addr_call = addr_at(0);
-+  assert(NativeCall::is_call_at(addr_call), "unexpected code at call site");
++  VMReg as_VMReg() const;
 +
-+  // Patch the constant in the call's trampoline stub.
-+  address trampoline_stub_addr = get_trampoline();
-+  if (trampoline_stub_addr != NULL) {
-+    assert (!is_NativeCallTrampolineStub_at(dest), "chained trampolines");
-+    nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest);
-+  }
++  // accessors
++  int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
++  int encoding_nocheck() const    { return (intptr_t)this; }
++  bool is_valid() const           { return (unsigned)encoding_nocheck() < number_of_registers; }
++  const char* name() const;
 +
-+  // Patch the call.
-+  if (Assembler::reachable_from_branch_at(addr_call, dest)) {
-+    set_destination(dest);
-+  } else {
-+    assert (trampoline_stub_addr != NULL, "we need a trampoline");
-+    set_destination(trampoline_stub_addr);
++  // for rvc
++  int compressed_encoding() const {
++    assert(is_compressed_valid(), "invalid compressed register");
++    return encoding() - compressed_register_base;
 +  }
 +
-+  ICache::invalidate_range(addr_call, instruction_size);
-+}
++  int compressed_encoding_nocheck() const {
++    return encoding_nocheck() - compressed_register_base;
++  }
 +
-+address NativeCall::get_trampoline() {
-+  address call_addr = addr_at(0);
-+
-+  CodeBlob *code = CodeCache::find_blob(call_addr);
-+  assert(code != NULL, "Could not find the containing code blob");
-+
-+  address jal_destination = MacroAssembler::pd_call_destination(call_addr);
-+  if (code != NULL && code->contains(jal_destination) && is_NativeCallTrampolineStub_at(jal_destination)) {
-+    return jal_destination;
-+  }
-+
-+  if (code != NULL && code->is_nmethod()) {
-+    return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code);
++  bool is_compressed_valid() const {
++    return encoding_nocheck() >= compressed_register_base &&
++           encoding_nocheck() <= compressed_register_top;
 +  }
 +
-+  return NULL;
-+}
-+
-+// Inserts a native call instruction at a given pc
-+void NativeCall::insert(address code_pos, address entry) { Unimplemented(); }
-+
-+//-------------------------------------------------------------------
++  // Return the bit which represents this register.  This is intended
++  // to be ORed into a bitmask: for usage see class RegSet below.
++  uint64_t bit(bool should_set = true) const { return should_set ? 1 << encoding() : 0; }
++};
 +
-+void NativeMovConstReg::verify() {
-+  if (!(nativeInstruction_at(instruction_address())->is_movptr() ||
-+        is_auipc_at(instruction_address()))) {
-+    fatal("should be MOVPTR or AUIPC");
-+  }
-+}
++// The integer registers of the RISCV architecture
 +
-+intptr_t NativeMovConstReg::data() const {
-+  address addr = MacroAssembler::target_addr_for_insn(instruction_address());
-+  if (maybe_cpool_ref(instruction_address())) {
-+    return *(intptr_t*)addr;
-+  } else {
-+    return (intptr_t)addr;
-+  }
-+}
++CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
 +
-+void NativeMovConstReg::set_data(intptr_t x) {
-+  if (maybe_cpool_ref(instruction_address())) {
-+    address addr = MacroAssembler::target_addr_for_insn(instruction_address());
-+    *(intptr_t*)addr = x;
-+  } else {
-+    // Store x into the instruction stream.
-+    MacroAssembler::pd_patch_instruction_size(instruction_address(), (address)x);
-+    ICache::invalidate_range(instruction_address(), movptr_instruction_size);
-+  }
++CONSTANT_REGISTER_DECLARATION(Register, x0,    (0));
++CONSTANT_REGISTER_DECLARATION(Register, x1,    (1));
++CONSTANT_REGISTER_DECLARATION(Register, x2,    (2));
++CONSTANT_REGISTER_DECLARATION(Register, x3,    (3));
++CONSTANT_REGISTER_DECLARATION(Register, x4,    (4));
++CONSTANT_REGISTER_DECLARATION(Register, x5,    (5));
++CONSTANT_REGISTER_DECLARATION(Register, x6,    (6));
++CONSTANT_REGISTER_DECLARATION(Register, x7,    (7));
++CONSTANT_REGISTER_DECLARATION(Register, x8,    (8));
++CONSTANT_REGISTER_DECLARATION(Register, x9,    (9));
++CONSTANT_REGISTER_DECLARATION(Register, x10,  (10));
++CONSTANT_REGISTER_DECLARATION(Register, x11,  (11));
++CONSTANT_REGISTER_DECLARATION(Register, x12,  (12));
++CONSTANT_REGISTER_DECLARATION(Register, x13,  (13));
++CONSTANT_REGISTER_DECLARATION(Register, x14,  (14));
++CONSTANT_REGISTER_DECLARATION(Register, x15,  (15));
++CONSTANT_REGISTER_DECLARATION(Register, x16,  (16));
++CONSTANT_REGISTER_DECLARATION(Register, x17,  (17));
++CONSTANT_REGISTER_DECLARATION(Register, x18,  (18));
++CONSTANT_REGISTER_DECLARATION(Register, x19,  (19));
++CONSTANT_REGISTER_DECLARATION(Register, x20,  (20));
++CONSTANT_REGISTER_DECLARATION(Register, x21,  (21));
++CONSTANT_REGISTER_DECLARATION(Register, x22,  (22));
++CONSTANT_REGISTER_DECLARATION(Register, x23,  (23));
++CONSTANT_REGISTER_DECLARATION(Register, x24,  (24));
++CONSTANT_REGISTER_DECLARATION(Register, x25,  (25));
++CONSTANT_REGISTER_DECLARATION(Register, x26,  (26));
++CONSTANT_REGISTER_DECLARATION(Register, x27,  (27));
++CONSTANT_REGISTER_DECLARATION(Register, x28,  (28));
++CONSTANT_REGISTER_DECLARATION(Register, x29,  (29));
++CONSTANT_REGISTER_DECLARATION(Register, x30,  (30));
++CONSTANT_REGISTER_DECLARATION(Register, x31,  (31));
 +
-+  // Find and replace the oop/metadata corresponding to this
-+  // instruction in oops section.
-+  CodeBlob* cb = CodeCache::find_blob(instruction_address());
-+  nmethod* nm = cb->as_nmethod_or_null();
-+  if (nm != NULL) {
-+    RelocIterator iter(nm, instruction_address(), next_instruction_address());
-+    while (iter.next()) {
-+      if (iter.type() == relocInfo::oop_type) {
-+        oop* oop_addr = iter.oop_reloc()->oop_addr();
-+        *oop_addr = cast_to_oop(x);
-+        break;
-+      } else if (iter.type() == relocInfo::metadata_type) {
-+        Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr();
-+        *metadata_addr = (Metadata*)x;
-+        break;
-+      }
-+    }
-+  }
-+}
++// Use FloatRegister as shortcut
++class FloatRegisterImpl;
++typedef FloatRegisterImpl* FloatRegister;
 +
-+void NativeMovConstReg::print() {
-+  tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT,
-+                p2i(instruction_address()), data());
++inline FloatRegister as_FloatRegister(int encoding) {
++  return (FloatRegister)(intptr_t) encoding;
 +}
 +
-+//-------------------------------------------------------------------
-+
-+int NativeMovRegMem::offset() const  {
-+  Unimplemented();
-+  return 0;
-+}
++// The implementation of floating point registers for the architecture
++class FloatRegisterImpl: public AbstractRegisterImpl {
++ public:
++  enum {
++    number_of_registers     = 32,
++    max_slots_per_register  = 2,
 +
-+void NativeMovRegMem::set_offset(int x) { Unimplemented(); }
++    // float registers in the range of [f8~f15] correspond to RVC. Please see Table 16.2 in spec.
++    compressed_register_base = 8,
++    compressed_register_top  = 15,
++  };
 +
-+void NativeMovRegMem::verify() {
-+  Unimplemented();
-+}
++  // construction
++  inline friend FloatRegister as_FloatRegister(int encoding);
 +
-+//--------------------------------------------------------------------------------
++  VMReg as_VMReg() const;
 +
-+void NativeJump::verify() { }
++  // derived registers, offsets, and addresses
++  FloatRegister successor() const { return as_FloatRegister(encoding() + 1); }
 +
++  // accessors
++  int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
++  int encoding_nocheck() const    { return (intptr_t)this; }
++  int is_valid() const            { return (unsigned)encoding_nocheck() < number_of_registers; }
++  const char* name() const;
 +
-+void NativeJump::check_verified_entry_alignment(address entry, address verified_entry) {
-+}
++  // for rvc
++  int compressed_encoding() const {
++    assert(is_compressed_valid(), "invalid compressed register");
++    return encoding() - compressed_register_base;
++  }
 +
++  int compressed_encoding_nocheck() const {
++    return encoding_nocheck() - compressed_register_base;
++  }
 +
-+address NativeJump::jump_destination() const {
-+  address dest = MacroAssembler::target_addr_for_insn(instruction_address());
++  bool is_compressed_valid() const {
++    return encoding_nocheck() >= compressed_register_base &&
++           encoding_nocheck() <= compressed_register_top;
++  }
++};
 +
-+  // We use jump to self as the unresolved address which the inline
-+  // cache code (and relocs) know about
-+  // As a special case we also use sequence movptr_with_offset(r,0), jalr(r,0)
-+  // i.e. jump to 0 when we need leave space for a wide immediate
-+  // load
++// The float registers of the RISCV architecture
 +
-+  // return -1 if jump to self or to 0
-+  if ((dest == (address) this) || dest == 0) {
-+    dest = (address) -1;
-+  }
++CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1));
 +
-+  return dest;
-+};
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f0     , ( 0));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f1     , ( 1));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f2     , ( 2));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f3     , ( 3));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f4     , ( 4));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f5     , ( 5));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f6     , ( 6));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f7     , ( 7));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f8     , ( 8));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f9     , ( 9));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f10    , (10));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f11    , (11));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f12    , (12));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f13    , (13));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f14    , (14));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f15    , (15));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f16    , (16));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f17    , (17));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f18    , (18));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f19    , (19));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f20    , (20));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f21    , (21));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f22    , (22));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f23    , (23));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f24    , (24));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f25    , (25));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f26    , (26));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f27    , (27));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f28    , (28));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f29    , (29));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f30    , (30));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f31    , (31));
 +
-+void NativeJump::set_jump_destination(address dest) {
-+  // We use jump to self as the unresolved address which the inline
-+  // cache code (and relocs) know about
-+  if (dest == (address) -1)
-+    dest = instruction_address();
++// Use VectorRegister as shortcut
++class VectorRegisterImpl;
++typedef VectorRegisterImpl* VectorRegister;
 +
-+  MacroAssembler::pd_patch_instruction(instruction_address(), dest);
-+  ICache::invalidate_range(instruction_address(), instruction_size);
++inline VectorRegister as_VectorRegister(int encoding) {
++  return (VectorRegister)(intptr_t) encoding;
 +}
 +
-+//-------------------------------------------------------------------
++// The implementation of vector registers for RVV
++class VectorRegisterImpl: public AbstractRegisterImpl {
++ public:
++  enum {
++    number_of_registers    = 32,
++    max_slots_per_register = 4
++  };
 +
-+address NativeGeneralJump::jump_destination() const {
-+  NativeMovConstReg* move = nativeMovConstReg_at(instruction_address());
-+  address dest = (address) move->data();
++  // construction
++  inline friend VectorRegister as_VectorRegister(int encoding);
 +
-+  // We use jump to self as the unresolved address which the inline
-+  // cache code (and relocs) know about
-+  // As a special case we also use jump to 0 when first generating
-+  // a general jump
++  VMReg as_VMReg() const;
 +
-+  // return -1 if jump to self or to 0
-+  if ((dest == (address) this) || dest == 0) {
-+    dest = (address) -1;
-+  }
++  // derived registers, offsets, and addresses
++  VectorRegister successor() const { return as_VectorRegister(encoding() + 1); }
 +
-+  return dest;
-+}
++  // accessors
++  int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
++  int encoding_nocheck() const    { return (intptr_t)this; }
++  bool is_valid() const           { return (unsigned)encoding_nocheck() < number_of_registers; }
++  const char* name() const;
 +
-+//-------------------------------------------------------------------
++};
 +
-+bool NativeInstruction::is_safepoint_poll() {
-+  return is_lwu_to_zr(address(this));
-+}
++// The vector registers of RVV
++CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg , (-1));
 +
-+bool NativeInstruction::is_lwu_to_zr(address instr) {
-+  assert_cond(instr != NULL);
-+  return (extract_opcode(instr) == 0b0000011 &&
-+          extract_funct3(instr) == 0b110 &&
-+          extract_rd(instr) == zr);         // zr
-+}
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v0     , ( 0));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v1     , ( 1));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v2     , ( 2));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v3     , ( 3));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v4     , ( 4));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v5     , ( 5));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v6     , ( 6));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v7     , ( 7));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v8     , ( 8));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v9     , ( 9));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v10    , (10));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v11    , (11));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v12    , (12));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v13    , (13));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v14    , (14));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v15    , (15));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v16    , (16));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v17    , (17));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v18    , (18));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v19    , (19));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v20    , (20));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v21    , (21));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v22    , (22));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v23    , (23));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v24    , (24));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v25    , (25));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v26    , (26));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v27    , (27));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v28    , (28));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v29    , (29));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v30    , (30));
++CONSTANT_REGISTER_DECLARATION(VectorRegister, v31    , (31));
 +
-+// A 16-bit instruction with all bits ones is permanently reserved as an illegal instruction.
-+bool NativeInstruction::is_sigill_zombie_not_entrant() {
-+  // jvmci
-+  return uint_at(0) == 0xffffffff;
-+}
 +
-+void NativeIllegalInstruction::insert(address code_pos) {
-+  assert_cond(code_pos != NULL);
-+  *(juint*)code_pos = 0xffffffff; // all bits ones is permanently reserved as an illegal instruction
-+}
++// Need to know the total number of registers of all sorts for SharedInfo.
++// Define a class that exports it.
++class ConcreteRegisterImpl : public AbstractRegisterImpl {
++ public:
++  enum {
++  // A big enough number for C2: all the registers plus flags
++  // This number must be large enough to cover REG_COUNT (defined by c2) registers.
++  // There is no requirement that any ordering here matches any ordering c2 gives
++  // it's optoregs.
 +
-+bool NativeInstruction::is_stop() {
-+  return uint_at(0) == 0xffffffff; // an illegal instruction
-+}
++    number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers +
++                           FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers)
++  };
 +
-+//-------------------------------------------------------------------
++  // added to make it compile
++  static const int max_gpr;
++  static const int max_fpr;
++};
 +
-+// MT-safe inserting of a jump over a jump or a nop (used by
-+// nmethod::make_not_entrant_or_zombie)
++// A set of registers
++class RegSet {
++  uint32_t _bitset;
 +
-+void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) {
++  RegSet(uint32_t bitset) : _bitset(bitset) { }
 +
-+  assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch");
++public:
 +
-+  assert(nativeInstruction_at(verified_entry)->is_jump_or_nop() ||
-+         nativeInstruction_at(verified_entry)->is_sigill_zombie_not_entrant(),
-+         "riscv cannot replace non-jump with jump");
++  RegSet() : _bitset(0) { }
 +
-+  // Patch this nmethod atomically.
-+  if (Assembler::reachable_from_branch_at(verified_entry, dest)) {
-+    ptrdiff_t offset = dest - verified_entry;
-+    guarantee(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction."); // 1M
++  RegSet(Register r1) : _bitset(r1->bit()) { }
 +
-+    uint32_t insn = 0;
-+    address pInsn = (address)&insn;
-+    Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1);
-+    Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff);
-+    Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1);
-+    Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff);
-+    Assembler::patch(pInsn, 11, 7, 0); // zero, no link jump
-+    Assembler::patch(pInsn, 6, 0, 0b1101111); // j, (jal x0 offset)
-+    *(unsigned int*)verified_entry = insn;
-+  } else {
-+    // We use an illegal instruction for marking a method as
-+    // not_entrant or zombie.
-+    NativeIllegalInstruction::insert(verified_entry);
++  RegSet operator+(const RegSet aSet) const {
++    RegSet result(_bitset | aSet._bitset);
++    return result;
 +  }
 +
-+  ICache::invalidate_range(verified_entry, instruction_size);
-+}
-+
-+void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
-+  CodeBuffer cb(code_pos, instruction_size);
-+  MacroAssembler a(&cb);
-+
-+  int32_t offset = 0;
-+  a.movptr_with_offset(t0, entry, offset); // lui, addi, slli, addi, slli
-+  a.jalr(x0, t0, offset); // jalr
++  RegSet operator-(const RegSet aSet) const {
++    RegSet result(_bitset & ~aSet._bitset);
++    return result;
++  }
 +
-+  ICache::invalidate_range(code_pos, instruction_size);
-+}
++  RegSet &operator+=(const RegSet aSet) {
++    *this = *this + aSet;
++    return *this;
++  }
 +
-+// MT-safe patching of a long jump instruction.
-+void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
-+  ShouldNotCallThis();
-+}
++  RegSet &operator-=(const RegSet aSet) {
++    *this = *this - aSet;
++    return *this;
++  }
 +
++  static RegSet of(Register r1) {
++    return RegSet(r1);
++  }
 +
-+address NativeCallTrampolineStub::destination(nmethod *nm) const {
-+  return ptr_at(data_offset);
-+}
++  static RegSet of(Register r1, Register r2) {
++    return of(r1) + r2;
++  }
 +
-+void NativeCallTrampolineStub::set_destination(address new_destination) {
-+  set_ptr_at(data_offset, new_destination);
-+  OrderAccess::release();
-+}
++  static RegSet of(Register r1, Register r2, Register r3) {
++    return of(r1, r2) + r3;
++  }
 +
-+uint32_t NativeMembar::get_kind() {
-+  uint32_t insn = uint_at(0);
++  static RegSet of(Register r1, Register r2, Register r3, Register r4) {
++    return of(r1, r2, r3) + r4;
++  }
 +
-+  uint32_t predecessor = Assembler::extract(insn, 27, 24);
-+  uint32_t successor = Assembler::extract(insn, 23, 20);
++  static RegSet range(Register start, Register end) {
++    uint32_t bits = ~0;
++    bits <<= start->encoding();
++    bits <<= 31 - end->encoding();
++    bits >>= 31 - end->encoding();
 +
-+  return MacroAssembler::pred_succ_to_membar_mask(predecessor, successor);
-+}
++    return RegSet(bits);
++  }
 +
-+void NativeMembar::set_kind(uint32_t order_kind) {
-+  uint32_t predecessor = 0;
-+  uint32_t successor = 0;
++  uint32_t bits() const { return _bitset; }
 +
-+  MacroAssembler::membar_mask_to_pred_succ(order_kind, predecessor, successor);
++private:
 +
-+  uint32_t insn = uint_at(0);
-+  address pInsn = (address) &insn;
-+  Assembler::patch(pInsn, 27, 24, predecessor);
-+  Assembler::patch(pInsn, 23, 20, successor);
++  Register first() {
++    uint32_t first = _bitset & -_bitset;
++    return first ? as_Register(exact_log2(first)) : noreg;
++  }
++};
 +
-+  address membar = addr_at(0);
-+  *(unsigned int*) membar = insn;
-+}
-diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
++#endif // CPU_RISCV_REGISTER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
 new file mode 100644
-index 00000000000..718b2e3de6c
+index 0000000000..047ea2276c
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
-@@ -0,0 +1,572 @@
++++ b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
+@@ -0,0 +1,112 @@
 +/*
-+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved.
++ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -28612,561 +27586,101 @@ index 00000000000..718b2e3de6c
 + *
 + */
 +
-+#ifndef CPU_RISCV_NATIVEINST_RISCV_HPP
-+#define CPU_RISCV_NATIVEINST_RISCV_HPP
-+
-+#include "asm/assembler.hpp"
-+#include "runtime/icache.hpp"
-+#include "runtime/os.hpp"
-+
-+// We have interfaces for the following instructions:
-+// - NativeInstruction
-+// - - NativeCall
-+// - - NativeMovConstReg
-+// - - NativeMovRegMem
-+// - - NativeJump
-+// - - NativeGeneralJump
-+// - - NativeIllegalInstruction
-+// - - NativeCallTrampolineStub
-+// - - NativeMembar
-+// - - NativeFenceI
-+
-+// The base class for different kinds of native instruction abstractions.
-+// Provides the primitive operations to manipulate code relative to this.
-+
-+class NativeCall;
-+
-+class NativeInstruction {
-+  friend class Relocation;
-+  friend bool is_NativeCallTrampolineStub_at(address);
-+ public:
-+  enum {
-+    instruction_size = 4,
-+    compressed_instruction_size = 2,
-+  };
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/relocInfo.hpp"
++#include "nativeInst_riscv.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/safepoint.hpp"
 +
-+  juint encoding() const {
-+    return uint_at(0);
++void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
++  if (verify_only) {
++    return;
 +  }
 +
-+  bool is_jal()                             const { return is_jal_at(addr_at(0));         }
-+  bool is_movptr()                          const { return is_movptr_at(addr_at(0));      }
-+  bool is_call()                            const { return is_call_at(addr_at(0));        }
-+  bool is_jump()                            const { return is_jump_at(addr_at(0));        }
++  int bytes;
 +
-+  static bool is_jal_at(address instr)        { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1101111; }
-+  static bool is_jalr_at(address instr)       { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100111 && extract_funct3(instr) == 0b000; }
-+  static bool is_branch_at(address instr)     { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100011; }
-+  static bool is_ld_at(address instr)         { assert_cond(instr != NULL); return is_load_at(instr) && extract_funct3(instr) == 0b011; }
-+  static bool is_load_at(address instr)       { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000011; }
-+  static bool is_float_load_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000111; }
-+  static bool is_auipc_at(address instr)      { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010111; }
-+  static bool is_jump_at(address instr)       { assert_cond(instr != NULL); return is_branch_at(instr) || is_jal_at(instr) || is_jalr_at(instr); }
-+  static bool is_addi_at(address instr)       { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010011 && extract_funct3(instr) == 0b000; }
-+  static bool is_addiw_at(address instr)      { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0011011 && extract_funct3(instr) == 0b000; }
-+  static bool is_lui_at(address instr)        { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0110111; }
-+  static bool is_slli_shift_at(address instr, uint32_t shift) {
-+    assert_cond(instr != NULL);
-+    return (extract_opcode(instr) == 0b0010011 && // opcode field
-+            extract_funct3(instr) == 0b001 &&     // funct3 field, select the type of operation
-+            Assembler::extract(((unsigned*)instr)[0], 25, 20) == shift);    // shamt field
++  switch (type()) {
++    case relocInfo::oop_type: {
++      oop_Relocation *reloc = (oop_Relocation *)this;
++      if (NativeInstruction::is_load_pc_relative_at(addr())) {
++        address constptr = (address)code()->oop_addr_at(reloc->oop_index());
++        bytes = MacroAssembler::pd_patch_instruction_size(addr(), constptr);
++        assert(*(address*)constptr == x, "error in oop relocation");
++      } else {
++        bytes = MacroAssembler::patch_oop(addr(), x);
++      }
++      break;
++    }
++    default:
++      bytes = MacroAssembler::pd_patch_instruction_size(addr(), x);
++      break;
 +  }
++  ICache::invalidate_range(addr(), bytes);
++}
 +
-+  static Register extract_rs1(address instr);
-+  static Register extract_rs2(address instr);
-+  static Register extract_rd(address instr);
-+  static uint32_t extract_opcode(address instr);
-+  static uint32_t extract_funct3(address instr);
-+
-+  // the instruction sequence of movptr is as below:
-+  //     lui
-+  //     addi
-+  //     slli
-+  //     addi
-+  //     slli
-+  //     addi/jalr/load
-+  static bool check_movptr_data_dependency(address instr) {
-+    address lui = instr;
-+    address addi1 = lui + instruction_size;
-+    address slli1 = addi1 + instruction_size;
-+    address addi2 = slli1 + instruction_size;
-+    address slli2 = addi2 + instruction_size;
-+    address last_instr = slli2 + instruction_size;
-+    return extract_rs1(addi1) == extract_rd(lui) &&
-+           extract_rs1(addi1) == extract_rd(addi1) &&
-+           extract_rs1(slli1) == extract_rd(addi1) &&
-+           extract_rs1(slli1) == extract_rd(slli1) &&
-+           extract_rs1(addi2) == extract_rd(slli1) &&
-+           extract_rs1(addi2) == extract_rd(addi2) &&
-+           extract_rs1(slli2) == extract_rd(addi2) &&
-+           extract_rs1(slli2) == extract_rd(slli2) &&
-+           extract_rs1(last_instr) == extract_rd(slli2);
-+  }
-+
-+  // the instruction sequence of li64 is as below:
-+  //     lui
-+  //     addi
-+  //     slli
-+  //     addi
-+  //     slli
-+  //     addi
-+  //     slli
-+  //     addi
-+  static bool check_li64_data_dependency(address instr) {
-+    address lui = instr;
-+    address addi1 = lui + instruction_size;
-+    address slli1 = addi1 + instruction_size;
-+    address addi2 = slli1 + instruction_size;
-+    address slli2 = addi2 + instruction_size;
-+    address addi3 = slli2 + instruction_size;
-+    address slli3 = addi3 + instruction_size;
-+    address addi4 = slli3 + instruction_size;
-+    return extract_rs1(addi1) == extract_rd(lui) &&
-+           extract_rs1(addi1) == extract_rd(addi1) &&
-+           extract_rs1(slli1) == extract_rd(addi1) &&
-+           extract_rs1(slli1) == extract_rd(slli1) &&
-+           extract_rs1(addi2) == extract_rd(slli1) &&
-+           extract_rs1(addi2) == extract_rd(addi2) &&
-+           extract_rs1(slli2) == extract_rd(addi2) &&
-+           extract_rs1(slli2) == extract_rd(slli2) &&
-+           extract_rs1(addi3) == extract_rd(slli2) &&
-+           extract_rs1(addi3) == extract_rd(addi3) &&
-+           extract_rs1(slli3) == extract_rd(addi3) &&
-+           extract_rs1(slli3) == extract_rd(slli3) &&
-+           extract_rs1(addi4) == extract_rd(slli3) &&
-+           extract_rs1(addi4) == extract_rd(addi4);
-+  }
-+
-+  // the instruction sequence of li32 is as below:
-+  //     lui
-+  //     addiw
-+  static bool check_li32_data_dependency(address instr) {
-+    address lui = instr;
-+    address addiw = lui + instruction_size;
-+
-+    return extract_rs1(addiw) == extract_rd(lui) &&
-+           extract_rs1(addiw) == extract_rd(addiw);
-+  }
-+
-+  // the instruction sequence of pc-relative is as below:
-+  //     auipc
-+  //     jalr/addi/load/float_load
-+  static bool check_pc_relative_data_dependency(address instr) {
-+    address auipc = instr;
-+    address last_instr = auipc + instruction_size;
-+
-+    return extract_rs1(last_instr) == extract_rd(auipc);
-+  }
-+
-+  // the instruction sequence of load_label is as below:
-+  //     auipc
-+  //     load
-+  static bool check_load_pc_relative_data_dependency(address instr) {
-+    address auipc = instr;
-+    address load = auipc + instruction_size;
-+
-+    return extract_rd(load) == extract_rd(auipc) &&
-+           extract_rs1(load) == extract_rd(load);
-+  }
-+
-+  static bool is_movptr_at(address instr);
-+  static bool is_li32_at(address instr);
-+  static bool is_li64_at(address instr);
-+  static bool is_pc_relative_at(address branch);
-+  static bool is_load_pc_relative_at(address branch);
-+
-+  static bool is_call_at(address instr) {
-+    if (is_jal_at(instr) || is_jalr_at(instr)) {
-+      return true;
++address Relocation::pd_call_destination(address orig_addr) {
++  assert(is_call(), "should be an address instruction here");
++  if (NativeCall::is_call_at(addr())) {
++    address trampoline = nativeCall_at(addr())->get_trampoline();
++    if (trampoline != NULL) {
++      return nativeCallTrampolineStub_at(trampoline)->destination();
 +    }
-+    return false;
-+  }
-+  static bool is_lwu_to_zr(address instr);
-+
-+  inline bool is_nop();
-+  inline bool is_jump_or_nop();
-+  bool is_safepoint_poll();
-+  bool is_sigill_zombie_not_entrant();
-+  bool is_stop();
-+
-+ protected:
-+  address addr_at(int offset) const    { return address(this) + offset; }
-+
-+  jint int_at(int offset) const        { return *(jint*) addr_at(offset); }
-+  juint uint_at(int offset) const      { return *(juint*) addr_at(offset); }
-+
-+  address ptr_at(int offset) const     { return *(address*) addr_at(offset); }
-+
-+  oop  oop_at (int offset) const       { return *(oop*) addr_at(offset); }
-+
-+
-+  void set_int_at(int offset, jint  i)        { *(jint*)addr_at(offset) = i; }
-+  void set_uint_at(int offset, jint  i)       { *(juint*)addr_at(offset) = i; }
-+  void set_ptr_at (int offset, address  ptr)  { *(address*) addr_at(offset) = ptr; }
-+  void set_oop_at (int offset, oop  o)        { *(oop*) addr_at(offset) = o; }
-+
-+ public:
-+
-+  inline friend NativeInstruction* nativeInstruction_at(address addr);
-+
-+  static bool maybe_cpool_ref(address instr) {
-+    return is_auipc_at(instr);
-+  }
-+
-+  bool is_membar() {
-+    return (uint_at(0) & 0x7f) == 0b1111 && extract_funct3(addr_at(0)) == 0;
-+  }
-+};
-+
-+inline NativeInstruction* nativeInstruction_at(address addr) {
-+  return (NativeInstruction*)addr;
-+}
-+
-+// The natural type of an RISCV instruction is uint32_t
-+inline NativeInstruction* nativeInstruction_at(uint32_t *addr) {
-+  return (NativeInstruction*)addr;
-+}
-+
-+inline NativeCall* nativeCall_at(address addr);
-+// The NativeCall is an abstraction for accessing/manipulating native
-+// call instructions (used to manipulate inline caches, primitive &
-+// DSO calls, etc.).
-+
-+class NativeCall: public NativeInstruction {
-+ public:
-+  enum RISCV_specific_constants {
-+    instruction_size            =    4,
-+    instruction_offset          =    0,
-+    displacement_offset         =    0,
-+    return_address_offset       =    4
-+  };
-+
-+  address instruction_address() const       { return addr_at(instruction_offset); }
-+  address next_instruction_address() const  { return addr_at(return_address_offset); }
-+  address return_address() const            { return addr_at(return_address_offset); }
-+  address destination() const;
-+
-+  void set_destination(address dest) {
-+    assert(is_jal(), "Should be jal instruction!");
-+    intptr_t offset = (intptr_t)(dest - instruction_address());
-+    assert((offset & 0x1) == 0, "bad alignment");
-+    assert(is_imm_in_range(offset, 20, 1), "encoding constraint");
-+    unsigned int insn = 0b1101111; // jal
-+    address pInsn = (address)(&insn);
-+    Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1);
-+    Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff);
-+    Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1);
-+    Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff);
-+    Assembler::patch(pInsn, 11, 7, ra->encoding()); // Rd must be x1, need ra
-+    set_int_at(displacement_offset, insn);
 +  }
-+
-+  void verify_alignment() {} // do nothing on riscv
-+  void verify();
-+  void print();
-+
-+  // Creation
-+  inline friend NativeCall* nativeCall_at(address addr);
-+  inline friend NativeCall* nativeCall_before(address return_address);
-+
-+  static bool is_call_before(address return_address) {
-+    return is_call_at(return_address - NativeCall::return_address_offset);
++  if (orig_addr != NULL) {
++    // the extracted address from the instructions in address orig_addr
++    address new_addr = MacroAssembler::pd_call_destination(orig_addr);
++    // If call is branch to self, don't try to relocate it, just leave it
++    // as branch to self. This happens during code generation if the code
++    // buffer expands. It will be relocated to the trampoline above once
++    // code generation is complete.
++    new_addr = (new_addr == orig_addr) ? addr() : new_addr;
++    return new_addr;
 +  }
-+
-+  // MT-safe patching of a call instruction.
-+  static void insert(address code_pos, address entry);
-+
-+  static void replace_mt_safe(address instr_addr, address code_buffer);
-+
-+  // Similar to replace_mt_safe, but just changes the destination.  The
-+  // important thing is that free-running threads are able to execute
-+  // this call instruction at all times.  If the call is an immediate BL
-+  // instruction we can simply rely on atomicity of 32-bit writes to
-+  // make sure other threads will see no intermediate states.
-+
-+  // We cannot rely on locks here, since the free-running threads must run at
-+  // full speed.
-+  //
-+  // Used in the runtime linkage of calls; see class CompiledIC.
-+  // (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.)
-+
-+  // The parameter assert_lock disables the assertion during code generation.
-+  void set_destination_mt_safe(address dest, bool assert_lock = true);
-+
-+  address get_trampoline();
-+};
-+
-+inline NativeCall* nativeCall_at(address addr) {
-+  assert_cond(addr != NULL);
-+  NativeCall* call = (NativeCall*)(addr - NativeCall::instruction_offset);
-+#ifdef ASSERT
-+  call->verify();
-+#endif
-+  return call;
-+}
-+
-+inline NativeCall* nativeCall_before(address return_address) {
-+  assert_cond(return_address != NULL);
-+  NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset);
-+#ifdef ASSERT
-+  call->verify();
-+#endif
-+  return call;
++  return MacroAssembler::pd_call_destination(addr());
 +}
 +
-+// An interface for accessing/manipulating native mov reg, imm instructions.
-+// (used to manipulate inlined 64-bit data calls, etc.)
-+class NativeMovConstReg: public NativeInstruction {
-+ public:
-+  enum RISCV_specific_constants {
-+    movptr_instruction_size             =    6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, addi.  See movptr().
-+    movptr_with_offset_instruction_size =    5 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli. See movptr_with_offset().
-+    load_pc_relative_instruction_size   =    2 * NativeInstruction::instruction_size, // auipc, ld
-+    instruction_offset                  =    0,
-+    displacement_offset                 =    0
-+  };
-+
-+  address instruction_address() const       { return addr_at(instruction_offset); }
-+  address next_instruction_address() const  {
-+    // if the instruction at 5 * instruction_size is addi,
-+    // it means a lui + addi + slli + addi + slli + addi instruction sequence,
-+    // and the next instruction address should be addr_at(6 * instruction_size).
-+    // However, when the instruction at 5 * instruction_size isn't addi,
-+    // the next instruction address should be addr_at(5 * instruction_size)
-+    if (nativeInstruction_at(instruction_address())->is_movptr()) {
-+      if (is_addi_at(addr_at(movptr_with_offset_instruction_size))) {
-+        // Assume: lui, addi, slli, addi, slli, addi
-+        return addr_at(movptr_instruction_size);
-+      } else {
-+        // Assume: lui, addi, slli, addi, slli
-+        return addr_at(movptr_with_offset_instruction_size);
-+      }
-+    } else if (is_load_pc_relative_at(instruction_address())) {
-+      // Assume: auipc, ld
-+      return addr_at(load_pc_relative_instruction_size);
-+    }
-+    guarantee(false, "Unknown instruction in NativeMovConstReg");
-+    return NULL;
-+  }
-+
-+  intptr_t data() const;
-+  void  set_data(intptr_t x);
-+
-+  void flush() {
-+    if (!maybe_cpool_ref(instruction_address())) {
-+      ICache::invalidate_range(instruction_address(), movptr_instruction_size);
++void Relocation::pd_set_call_destination(address x) {
++  assert(is_call(), "should be an address instruction here");
++  if (NativeCall::is_call_at(addr())) {
++    address trampoline = nativeCall_at(addr())->get_trampoline();
++    if (trampoline != NULL) {
++      nativeCall_at(addr())->set_destination_mt_safe(x, /* assert_lock */false);
++      return;
 +    }
 +  }
-+
-+  void  verify();
-+  void  print();
-+
-+  // Creation
-+  inline friend NativeMovConstReg* nativeMovConstReg_at(address addr);
-+  inline friend NativeMovConstReg* nativeMovConstReg_before(address addr);
-+};
-+
-+inline NativeMovConstReg* nativeMovConstReg_at(address addr) {
-+  assert_cond(addr != NULL);
-+  NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_offset);
-+#ifdef ASSERT
-+  test->verify();
-+#endif
-+  return test;
-+}
-+
-+inline NativeMovConstReg* nativeMovConstReg_before(address addr) {
-+  assert_cond(addr != NULL);
-+  NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset);
-+#ifdef ASSERT
-+  test->verify();
-+#endif
-+  return test;
-+}
-+
-+// RISCV should not use C1 runtime patching, so just leave NativeMovRegMem Unimplemented.
-+class NativeMovRegMem: public NativeInstruction {
-+ public:
-+  int instruction_start() const {
-+    Unimplemented();
-+    return 0;
-+  }
-+
-+  address instruction_address() const {
-+    Unimplemented();
-+    return NULL;
-+  }
-+
-+  int num_bytes_to_end_of_patch() const {
-+    Unimplemented();
-+    return 0;
-+  }
-+
-+  int offset() const;
-+
-+  void set_offset(int x);
-+
-+  void add_offset_in_bytes(int add_offset) { Unimplemented(); }
-+
-+  void verify();
-+  void print();
-+
-+ private:
-+  inline friend NativeMovRegMem* nativeMovRegMem_at (address addr);
-+};
-+
-+inline NativeMovRegMem* nativeMovRegMem_at (address addr) {
-+  Unimplemented();
-+  return NULL;
-+}
-+
-+class NativeJump: public NativeInstruction {
-+ public:
-+  enum RISCV_specific_constants {
-+    instruction_size            =    NativeInstruction::instruction_size,
-+    instruction_offset          =    0,
-+    data_offset                 =    0,
-+    next_instruction_offset     =    NativeInstruction::instruction_size
-+  };
-+
-+  address instruction_address() const       { return addr_at(instruction_offset); }
-+  address next_instruction_address() const  { return addr_at(instruction_size); }
-+  address jump_destination() const;
-+  void set_jump_destination(address dest);
-+
-+  // Creation
-+  inline friend NativeJump* nativeJump_at(address address);
-+
-+  void verify();
-+
-+  // Insertion of native jump instruction
-+  static void insert(address code_pos, address entry);
-+  // MT-safe insertion of native jump at verified method entry
-+  static void check_verified_entry_alignment(address entry, address verified_entry);
-+  static void patch_verified_entry(address entry, address verified_entry, address dest);
-+};
-+
-+inline NativeJump* nativeJump_at(address addr) {
-+  NativeJump* jump = (NativeJump*)(addr - NativeJump::instruction_offset);
-+#ifdef ASSERT
-+  jump->verify();
-+#endif
-+  return jump;
-+}
-+
-+class NativeGeneralJump: public NativeJump {
-+public:
-+  enum RISCV_specific_constants {
-+    instruction_size            =    6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, jalr
-+    instruction_offset          =    0,
-+    data_offset                 =    0,
-+    next_instruction_offset     =    6 * NativeInstruction::instruction_size  // lui, addi, slli, addi, slli, jalr
-+  };
-+
-+  address jump_destination() const;
-+
-+  static void insert_unconditional(address code_pos, address entry);
-+  static void replace_mt_safe(address instr_addr, address code_buffer);
-+};
-+
-+inline NativeGeneralJump* nativeGeneralJump_at(address addr) {
-+  assert_cond(addr != NULL);
-+  NativeGeneralJump* jump = (NativeGeneralJump*)(addr);
-+  debug_only(jump->verify();)
-+  return jump;
++  MacroAssembler::pd_patch_instruction_size(addr(), x);
++  address pd_call = pd_call_destination(addr());
++  assert(pd_call == x, "fail in reloc");
 +}
 +
-+class NativeIllegalInstruction: public NativeInstruction {
-+ public:
-+  // Insert illegal opcode as specific address
-+  static void insert(address code_pos);
-+};
-+
-+inline bool NativeInstruction::is_nop()         {
-+  uint32_t insn = *(uint32_t*)addr_at(0);
-+  return insn == 0x13;
++address* Relocation::pd_address_in_code() {
++  assert(NativeCall::is_load_pc_relative_at(addr()), "Not the expected instruction sequence!");
++  return (address*)(MacroAssembler::target_addr_for_insn(addr()));
 +}
 +
-+inline bool NativeInstruction::is_jump_or_nop() {
-+  return is_nop() || is_jump();
++address Relocation::pd_get_address_from_code() {
++  return MacroAssembler::pd_call_destination(addr());
 +}
 +
-+// Call trampoline stubs.
-+class NativeCallTrampolineStub : public NativeInstruction {
-+ public:
-+
-+  enum RISCV_specific_constants {
-+    // Refer to function emit_trampoline_stub.
-+    instruction_size = 3 * NativeInstruction::instruction_size + wordSize, // auipc + ld + jr + target address
-+    data_offset      = 3 * NativeInstruction::instruction_size,            // auipc + ld + jr
-+  };
-+
-+  address destination(nmethod *nm = NULL) const;
-+  void set_destination(address new_destination);
-+  ptrdiff_t destination_offset() const;
-+};
-+
-+inline bool is_NativeCallTrampolineStub_at(address addr) {
-+  // Ensure that the stub is exactly
-+  //      ld   t0, L--->auipc + ld
-+  //      jr   t0
-+  // L:
-+
-+  // judge inst + register + imm
-+  // 1). check the instructions: auipc + ld + jalr
-+  // 2). check if auipc[11:7] == t0 and ld[11:7] == t0 and ld[19:15] == t0 && jr[19:15] == t0
-+  // 3). check if the offset in ld[31:20] equals the data_offset
-+  assert_cond(addr != NULL);
-+  const int instr_size = NativeInstruction::instruction_size;
-+  if (NativeInstruction::is_auipc_at(addr) &&
-+      NativeInstruction::is_ld_at(addr + instr_size) &&
-+      NativeInstruction::is_jalr_at(addr + 2 * instr_size) &&
-+      (NativeInstruction::extract_rd(addr)                    == x5) &&
-+      (NativeInstruction::extract_rd(addr + instr_size)       == x5) &&
-+      (NativeInstruction::extract_rs1(addr + instr_size)      == x5) &&
-+      (NativeInstruction::extract_rs1(addr + 2 * instr_size)  == x5) &&
-+      (Assembler::extract(((unsigned*)addr)[1], 31, 20) == NativeCallTrampolineStub::data_offset)) {
-+    return true;
++void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
++  if (NativeInstruction::maybe_cpool_ref(addr())) {
++    address old_addr = old_addr_for(addr(), src, dest);
++    MacroAssembler::pd_patch_instruction_size(addr(), MacroAssembler::target_addr_for_insn(old_addr));
 +  }
-+  return false;
-+}
-+
-+inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) {
-+  assert_cond(addr != NULL);
-+  assert(is_NativeCallTrampolineStub_at(addr), "no call trampoline found");
-+  return (NativeCallTrampolineStub*)addr;
 +}
 +
-+class NativeMembar : public NativeInstruction {
-+public:
-+  uint32_t get_kind();
-+  void set_kind(uint32_t order_kind);
-+};
-+
-+inline NativeMembar *NativeMembar_at(address addr) {
-+  assert_cond(addr != NULL);
-+  assert(nativeInstruction_at(addr)->is_membar(), "no membar found");
-+  return (NativeMembar*)addr;
++void metadata_Relocation::pd_fix_value(address x) {
 +}
-+
-+class NativeFenceI : public NativeInstruction {
-+public:
-+  static inline int instruction_size() {
-+    // 2 for fence.i + fence
-+    return (UseConservativeFence ? 2 : 1) * NativeInstruction::instruction_size;
-+  }
-+};
-+
-+#endif // CPU_RISCV_NATIVEINST_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.cpp b/src/hotspot/cpu/riscv/registerMap_riscv.cpp
+diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.hpp b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp
 new file mode 100644
-index 00000000000..26c1edc36ff
+index 0000000000..840ed935d8
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/registerMap_riscv.cpp
-@@ -0,0 +1,45 @@
++++ b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp
+@@ -0,0 +1,44 @@
 +/*
-+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -29189,2062 +27703,1159 @@ index 00000000000..26c1edc36ff
 + *
 + */
 +
-+#include "precompiled.hpp"
-+#include "runtime/registerMap.hpp"
-+#include "vmreg_riscv.inline.hpp"
++#ifndef CPU_RISCV_RELOCINFO_RISCV_HPP
++#define CPU_RISCV_RELOCINFO_RISCV_HPP
 +
-+address RegisterMap::pd_location(VMReg base_reg, int slot_idx) const {
-+  if (base_reg->is_VectorRegister()) {
-+    assert(base_reg->is_concrete(), "must pass base reg");
-+    int base_reg_enc = (base_reg->value() - ConcreteRegisterImpl::max_fpr) /
-+                       VectorRegisterImpl::max_slots_per_register;
-+    intptr_t offset_in_bytes = slot_idx * VMRegImpl::stack_slot_size;
-+    address base_location = location(base_reg);
-+    if (base_location != NULL) {
-+      return base_location + offset_in_bytes;
-+    } else {
-+      return NULL;
-+    }
-+  } else {
-+    return location(base_reg->next(slot_idx));
-+  }
-+}
-diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.hpp b/src/hotspot/cpu/riscv/registerMap_riscv.hpp
++  // machine-dependent parts of class relocInfo
++ private:
++  enum {
++    // Relocations are byte-aligned.
++    offset_unit        =  1,
++    // Must be at least 1 for RelocInfo::narrow_oop_in_const.
++    format_width       =  1
++  };
++
++ public:
++
++  // This platform has no oops in the code that are not also
++  // listed in the oop section.
++  static bool mustIterateImmediateOopsInCode() { return false; }
++
++#endif // CPU_RISCV_RELOCINFO_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
 new file mode 100644
-index 00000000000..f34349811a9
+index 0000000000..d54ae97200
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/registerMap_riscv.hpp
-@@ -0,0 +1,43 @@
-+/*
-+ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++++ b/src/hotspot/cpu/riscv/riscv.ad
+@@ -0,0 +1,10273 @@
++//
++// Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
 +
-+#ifndef CPU_RISCV_REGISTERMAP_RISCV_HPP
-+#define CPU_RISCV_REGISTERMAP_RISCV_HPP
++// RISCV Architecture Description File
 +
-+// machine-dependent implemention for register maps
-+  friend class frame;
++//----------REGISTER DEFINITION BLOCK------------------------------------------
++// This information is used by the matcher and the register allocator to
++// describe individual registers and classes of registers within the target
++// archtecture.
 +
-+ private:
-+  // This is the hook for finding a register in an "well-known" location,
-+  // such as a register block of a predetermined format.
-+  address pd_location(VMReg reg) const { return NULL; }
-+  address pd_location(VMReg base_reg, int slot_idx) const;
++register %{
++//----------Architecture Description Register Definitions----------------------
++// General Registers
++// "reg_def"  name ( register save type, C convention save type,
++//                   ideal register type, encoding );
++// Register Save Types:
++//
++// NS  = No-Save:       The register allocator assumes that these registers
++//                      can be used without saving upon entry to the method, &
++//                      that they do not need to be saved at call sites.
++//
++// SOC = Save-On-Call:  The register allocator assumes that these registers
++//                      can be used without saving upon entry to the method,
++//                      but that they must be saved at call sites.
++//
++// SOE = Save-On-Entry: The register allocator assumes that these registers
++//                      must be saved before using them upon entry to the
++//                      method, but they do not need to be saved at call
++//                      sites.
++//
++// AS  = Always-Save:   The register allocator assumes that these registers
++//                      must be saved before using them upon entry to the
++//                      method, & that they must be saved at call sites.
++//
++// Ideal Register Type is used to determine how to save & restore a
++// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
++// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
++//
++// The encoding number is the actual bit-pattern placed into the opcodes.
 +
-+  // no PD state to clear or copy:
-+  void pd_clear() {}
-+  void pd_initialize() {}
-+  void pd_initialize_from(const RegisterMap* map) {}
++// We must define the 64 bit int registers in two 32 bit halves, the
++// real lower register and a virtual upper half register. upper halves
++// are used by the register allocator but are not actually supplied as
++// operands to memory ops.
++//
++// follow the C1 compiler in making registers
++//
++//   x7, x9-x17, x27-x31 volatile (caller save)
++//   x0-x4, x8, x23 system (no save, no allocate)
++//   x5-x6 non-allocatable (so we can use them as temporary regs)
 +
-+#endif // CPU_RISCV_REGISTERMAP_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp
-new file mode 100644
-index 00000000000..f8116e9df8c
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/register_riscv.cpp
-@@ -0,0 +1,73 @@
-+/*
-+ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++//
++// as regards Java usage. we don't use any callee save registers
++// because this makes it difficult to de-optimise a frame (see comment
++// in x86 implementation of Deoptimization::unwind_callee_save_values)
++//
 +
-+#include "precompiled.hpp"
-+#include "register_riscv.hpp"
++// General Registers
 +
-+REGISTER_IMPL_DEFINITION(Register, RegisterImpl, RegisterImpl::number_of_registers);
-+REGISTER_IMPL_DEFINITION(FloatRegister, FloatRegisterImpl, FloatRegisterImpl::number_of_registers);
-+REGISTER_IMPL_DEFINITION(VectorRegister, VectorRegisterImpl, VectorRegisterImpl::number_of_registers);
++reg_def R0      ( NS,  NS,  Op_RegI, 0,  x0->as_VMReg()         ); // zr
++reg_def R0_H    ( NS,  NS,  Op_RegI, 0,  x0->as_VMReg()->next() );
++reg_def R1      ( NS,  SOC, Op_RegI, 1,  x1->as_VMReg()         ); // ra
++reg_def R1_H    ( NS,  SOC, Op_RegI, 1,  x1->as_VMReg()->next() );
++reg_def R2      ( NS,  NS,  Op_RegI, 2,  x2->as_VMReg()         ); // sp
++reg_def R2_H    ( NS,  NS,  Op_RegI, 2,  x2->as_VMReg()->next() );
++reg_def R3      ( NS,  NS,  Op_RegI, 3,  x3->as_VMReg()         ); // gp
++reg_def R3_H    ( NS,  NS,  Op_RegI, 3,  x3->as_VMReg()->next() );
++reg_def R4      ( NS,  NS,  Op_RegI, 4,  x4->as_VMReg()         ); // tp
++reg_def R4_H    ( NS,  NS,  Op_RegI, 4,  x4->as_VMReg()->next() );
++reg_def R7      ( SOC, SOC, Op_RegI, 7,  x7->as_VMReg()         );
++reg_def R7_H    ( SOC, SOC, Op_RegI, 7,  x7->as_VMReg()->next() );
++reg_def R8      ( NS,  SOE, Op_RegI, 8,  x8->as_VMReg()         ); // fp
++reg_def R8_H    ( NS,  SOE, Op_RegI, 8,  x8->as_VMReg()->next() );
++reg_def R9      ( SOC, SOE, Op_RegI, 9,  x9->as_VMReg()         );
++reg_def R9_H    ( SOC, SOE, Op_RegI, 9,  x9->as_VMReg()->next() );
++reg_def R10     ( SOC, SOC, Op_RegI, 10, x10->as_VMReg()        );
++reg_def R10_H   ( SOC, SOC, Op_RegI, 10, x10->as_VMReg()->next());
++reg_def R11     ( SOC, SOC, Op_RegI, 11, x11->as_VMReg()        );
++reg_def R11_H   ( SOC, SOC, Op_RegI, 11, x11->as_VMReg()->next());
++reg_def R12     ( SOC, SOC, Op_RegI, 12, x12->as_VMReg()        );
++reg_def R12_H   ( SOC, SOC, Op_RegI, 12, x12->as_VMReg()->next());
++reg_def R13     ( SOC, SOC, Op_RegI, 13, x13->as_VMReg()        );
++reg_def R13_H   ( SOC, SOC, Op_RegI, 13, x13->as_VMReg()->next());
++reg_def R14     ( SOC, SOC, Op_RegI, 14, x14->as_VMReg()        );
++reg_def R14_H   ( SOC, SOC, Op_RegI, 14, x14->as_VMReg()->next());
++reg_def R15     ( SOC, SOC, Op_RegI, 15, x15->as_VMReg()        );
++reg_def R15_H   ( SOC, SOC, Op_RegI, 15, x15->as_VMReg()->next());
++reg_def R16     ( SOC, SOC, Op_RegI, 16, x16->as_VMReg()        );
++reg_def R16_H   ( SOC, SOC, Op_RegI, 16, x16->as_VMReg()->next());
++reg_def R17     ( SOC, SOC, Op_RegI, 17, x17->as_VMReg()        );
++reg_def R17_H   ( SOC, SOC, Op_RegI, 17, x17->as_VMReg()->next());
++reg_def R18     ( SOC, SOE, Op_RegI, 18, x18->as_VMReg()        );
++reg_def R18_H   ( SOC, SOE, Op_RegI, 18, x18->as_VMReg()->next());
++reg_def R19     ( SOC, SOE, Op_RegI, 19, x19->as_VMReg()        );
++reg_def R19_H   ( SOC, SOE, Op_RegI, 19, x19->as_VMReg()->next());
++reg_def R20     ( SOC, SOE, Op_RegI, 20, x20->as_VMReg()        ); // caller esp
++reg_def R20_H   ( SOC, SOE, Op_RegI, 20, x20->as_VMReg()->next());
++reg_def R21     ( SOC, SOE, Op_RegI, 21, x21->as_VMReg()        );
++reg_def R21_H   ( SOC, SOE, Op_RegI, 21, x21->as_VMReg()->next());
++reg_def R22     ( SOC, SOE, Op_RegI, 22, x22->as_VMReg()        );
++reg_def R22_H   ( SOC, SOE, Op_RegI, 22, x22->as_VMReg()->next());
++reg_def R23     ( NS,  SOE, Op_RegI, 23, x23->as_VMReg()        ); // java thread
++reg_def R23_H   ( NS,  SOE, Op_RegI, 23, x23->as_VMReg()->next());
++reg_def R24     ( SOC, SOE, Op_RegI, 24, x24->as_VMReg()        );
++reg_def R24_H   ( SOC, SOE, Op_RegI, 24, x24->as_VMReg()->next());
++reg_def R25     ( SOC, SOE, Op_RegI, 25, x25->as_VMReg()        );
++reg_def R25_H   ( SOC, SOE, Op_RegI, 25, x25->as_VMReg()->next());
++reg_def R26     ( SOC, SOE, Op_RegI, 26, x26->as_VMReg()        );
++reg_def R26_H   ( SOC, SOE, Op_RegI, 26, x26->as_VMReg()->next());
++reg_def R27     ( SOC, SOE, Op_RegI, 27, x27->as_VMReg()        ); // heapbase
++reg_def R27_H   ( SOC, SOE, Op_RegI, 27, x27->as_VMReg()->next());
++reg_def R28     ( SOC, SOC, Op_RegI, 28, x28->as_VMReg()        );
++reg_def R28_H   ( SOC, SOC, Op_RegI, 28, x28->as_VMReg()->next());
++reg_def R29     ( SOC, SOC, Op_RegI, 29, x29->as_VMReg()        );
++reg_def R29_H   ( SOC, SOC, Op_RegI, 29, x29->as_VMReg()->next());
++reg_def R30     ( SOC, SOC, Op_RegI, 30, x30->as_VMReg()        );
++reg_def R30_H   ( SOC, SOC, Op_RegI, 30, x30->as_VMReg()->next());
++reg_def R31     ( SOC, SOC, Op_RegI, 31, x31->as_VMReg()        );
++reg_def R31_H   ( SOC, SOC, Op_RegI, 31, x31->as_VMReg()->next());
 +
-+const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers *
-+                                          RegisterImpl::max_slots_per_register;
++// ----------------------------
++// Float/Double Registers
++// ----------------------------
 +
-+const int ConcreteRegisterImpl::max_fpr =
-+    ConcreteRegisterImpl::max_gpr +
-+    FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register;
++// Double Registers
 +
-+const int ConcreteRegisterImpl::max_vpr =
-+    ConcreteRegisterImpl::max_fpr +
-+    VectorRegisterImpl::number_of_registers * VectorRegisterImpl::max_slots_per_register;
++// The rules of ADL require that double registers be defined in pairs.
++// Each pair must be two 32-bit values, but not necessarily a pair of
++// single float registers. In each pair, ADLC-assigned register numbers
++// must be adjacent, with the lower number even. Finally, when the
++// CPU stores such a register pair to memory, the word associated with
++// the lower ADLC-assigned number must be stored to the lower address.
 +
++// RISCV has 32 floating-point registers. Each can store a single
++// or double precision floating-point value.
 +
-+const char* RegisterImpl::name() const {
-+  static const char *const names[number_of_registers] = {
-+    "zr", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "fp", "x9",
-+    "c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7",
-+    "x18", "x19", "esp", "xdispatch", "xbcp", "xthread", "xlocals",
-+    "xmonitors", "xcpool", "xheapbase", "x28", "x29", "x30", "xmethod"
-+  };
-+  return is_valid() ? names[encoding()] : "noreg";
-+}
++// for Java use float registers f0-f31 are always save on call whereas
++// the platform ABI treats f8-f9 and f18-f27 as callee save). Other
++// float registers are SOC as per the platform spec
 +
-+const char* FloatRegisterImpl::name() const {
-+  static const char *const names[number_of_registers] = {
-+    "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",
-+    "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",
-+    "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
-+    "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"
-+  };
-+  return is_valid() ? names[encoding()] : "noreg";
-+}
++reg_def F0    ( SOC, SOC, Op_RegF,  0,  f0->as_VMReg()          );
++reg_def F0_H  ( SOC, SOC, Op_RegF,  0,  f0->as_VMReg()->next()  );
++reg_def F1    ( SOC, SOC, Op_RegF,  1,  f1->as_VMReg()          );
++reg_def F1_H  ( SOC, SOC, Op_RegF,  1,  f1->as_VMReg()->next()  );
++reg_def F2    ( SOC, SOC, Op_RegF,  2,  f2->as_VMReg()          );
++reg_def F2_H  ( SOC, SOC, Op_RegF,  2,  f2->as_VMReg()->next()  );
++reg_def F3    ( SOC, SOC, Op_RegF,  3,  f3->as_VMReg()          );
++reg_def F3_H  ( SOC, SOC, Op_RegF,  3,  f3->as_VMReg()->next()  );
++reg_def F4    ( SOC, SOC, Op_RegF,  4,  f4->as_VMReg()          );
++reg_def F4_H  ( SOC, SOC, Op_RegF,  4,  f4->as_VMReg()->next()  );
++reg_def F5    ( SOC, SOC, Op_RegF,  5,  f5->as_VMReg()          );
++reg_def F5_H  ( SOC, SOC, Op_RegF,  5,  f5->as_VMReg()->next()  );
++reg_def F6    ( SOC, SOC, Op_RegF,  6,  f6->as_VMReg()          );
++reg_def F6_H  ( SOC, SOC, Op_RegF,  6,  f6->as_VMReg()->next()  );
++reg_def F7    ( SOC, SOC, Op_RegF,  7,  f7->as_VMReg()          );
++reg_def F7_H  ( SOC, SOC, Op_RegF,  7,  f7->as_VMReg()->next()  );
++reg_def F8    ( SOC, SOE, Op_RegF,  8,  f8->as_VMReg()          );
++reg_def F8_H  ( SOC, SOE, Op_RegF,  8,  f8->as_VMReg()->next()  );
++reg_def F9    ( SOC, SOE, Op_RegF,  9,  f9->as_VMReg()          );
++reg_def F9_H  ( SOC, SOE, Op_RegF,  9,  f9->as_VMReg()->next()  );
++reg_def F10   ( SOC, SOC, Op_RegF,  10, f10->as_VMReg()         );
++reg_def F10_H ( SOC, SOC, Op_RegF,  10, f10->as_VMReg()->next() );
++reg_def F11   ( SOC, SOC, Op_RegF,  11, f11->as_VMReg()         );
++reg_def F11_H ( SOC, SOC, Op_RegF,  11, f11->as_VMReg()->next() );
++reg_def F12   ( SOC, SOC, Op_RegF,  12, f12->as_VMReg()         );
++reg_def F12_H ( SOC, SOC, Op_RegF,  12, f12->as_VMReg()->next() );
++reg_def F13   ( SOC, SOC, Op_RegF,  13, f13->as_VMReg()         );
++reg_def F13_H ( SOC, SOC, Op_RegF,  13, f13->as_VMReg()->next() );
++reg_def F14   ( SOC, SOC, Op_RegF,  14, f14->as_VMReg()         );
++reg_def F14_H ( SOC, SOC, Op_RegF,  14, f14->as_VMReg()->next() );
++reg_def F15   ( SOC, SOC, Op_RegF,  15, f15->as_VMReg()         );
++reg_def F15_H ( SOC, SOC, Op_RegF,  15, f15->as_VMReg()->next() );
++reg_def F16   ( SOC, SOC, Op_RegF,  16, f16->as_VMReg()         );
++reg_def F16_H ( SOC, SOC, Op_RegF,  16, f16->as_VMReg()->next() );
++reg_def F17   ( SOC, SOC, Op_RegF,  17, f17->as_VMReg()         );
++reg_def F17_H ( SOC, SOC, Op_RegF,  17, f17->as_VMReg()->next() );
++reg_def F18   ( SOC, SOE, Op_RegF,  18, f18->as_VMReg()         );
++reg_def F18_H ( SOC, SOE, Op_RegF,  18, f18->as_VMReg()->next() );
++reg_def F19   ( SOC, SOE, Op_RegF,  19, f19->as_VMReg()         );
++reg_def F19_H ( SOC, SOE, Op_RegF,  19, f19->as_VMReg()->next() );
++reg_def F20   ( SOC, SOE, Op_RegF,  20, f20->as_VMReg()         );
++reg_def F20_H ( SOC, SOE, Op_RegF,  20, f20->as_VMReg()->next() );
++reg_def F21   ( SOC, SOE, Op_RegF,  21, f21->as_VMReg()         );
++reg_def F21_H ( SOC, SOE, Op_RegF,  21, f21->as_VMReg()->next() );
++reg_def F22   ( SOC, SOE, Op_RegF,  22, f22->as_VMReg()         );
++reg_def F22_H ( SOC, SOE, Op_RegF,  22, f22->as_VMReg()->next() );
++reg_def F23   ( SOC, SOE, Op_RegF,  23, f23->as_VMReg()         );
++reg_def F23_H ( SOC, SOE, Op_RegF,  23, f23->as_VMReg()->next() );
++reg_def F24   ( SOC, SOE, Op_RegF,  24, f24->as_VMReg()         );
++reg_def F24_H ( SOC, SOE, Op_RegF,  24, f24->as_VMReg()->next() );
++reg_def F25   ( SOC, SOE, Op_RegF,  25, f25->as_VMReg()         );
++reg_def F25_H ( SOC, SOE, Op_RegF,  25, f25->as_VMReg()->next() );
++reg_def F26   ( SOC, SOE, Op_RegF,  26, f26->as_VMReg()         );
++reg_def F26_H ( SOC, SOE, Op_RegF,  26, f26->as_VMReg()->next() );
++reg_def F27   ( SOC, SOE, Op_RegF,  27, f27->as_VMReg()         );
++reg_def F27_H ( SOC, SOE, Op_RegF,  27, f27->as_VMReg()->next() );
++reg_def F28   ( SOC, SOC, Op_RegF,  28, f28->as_VMReg()         );
++reg_def F28_H ( SOC, SOC, Op_RegF,  28, f28->as_VMReg()->next() );
++reg_def F29   ( SOC, SOC, Op_RegF,  29, f29->as_VMReg()         );
++reg_def F29_H ( SOC, SOC, Op_RegF,  29, f29->as_VMReg()->next() );
++reg_def F30   ( SOC, SOC, Op_RegF,  30, f30->as_VMReg()         );
++reg_def F30_H ( SOC, SOC, Op_RegF,  30, f30->as_VMReg()->next() );
++reg_def F31   ( SOC, SOC, Op_RegF,  31, f31->as_VMReg()         );
++reg_def F31_H ( SOC, SOC, Op_RegF,  31, f31->as_VMReg()->next() );
 +
-+const char* VectorRegisterImpl::name() const {
-+  static const char *const names[number_of_registers] = {
-+    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
-+    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
-+    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
-+    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"
-+  };
-+  return is_valid() ? names[encoding()] : "noreg";
-+}
-diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp
-new file mode 100644
-index 00000000000..a9200cac647
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/register_riscv.hpp
-@@ -0,0 +1,324 @@
-+/*
-+ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++// ----------------------------
++// Special Registers
++// ----------------------------
 +
-+#ifndef CPU_RISCV_REGISTER_RISCV_HPP
-+#define CPU_RISCV_REGISTER_RISCV_HPP
++// On riscv, the physical flag register is missing, so we use t1 instead,
++// to bridge the RegFlag semantics in share/opto
 +
-+#include "asm/register.hpp"
++reg_def RFLAGS   (SOC, SOC, Op_RegFlags, 6, x6->as_VMReg()        );
 +
-+#define CSR_FFLAGS   0x001        // Floating-Point Accrued Exceptions.
-+#define CSR_FRM      0x002        // Floating-Point Dynamic Rounding Mode.
-+#define CSR_FCSR     0x003        // Floating-Point Control and Status Register (frm + fflags).
-+#define CSR_VSTART   0x008        // Vector start position
-+#define CSR_VXSAT    0x009        // Fixed-Point Saturate Flag
-+#define CSR_VXRM     0x00A        // Fixed-Point Rounding Mode
-+#define CSR_VCSR     0x00F        // Vector control and status register
-+#define CSR_VL       0xC20        // Vector length
-+#define CSR_VTYPE    0xC21        // Vector data type register
-+#define CSR_VLENB    0xC22        // VLEN/8 (vector register length in bytes)
-+#define CSR_CYCLE    0xc00        // Cycle counter for RDCYCLE instruction.
-+#define CSR_TIME     0xc01        // Timer for RDTIME instruction.
-+#define CSR_INSTERT  0xc02        // Instructions-retired counter for RDINSTRET instruction.
++// Specify priority of register selection within phases of register
++// allocation.  Highest priority is first.  A useful heuristic is to
++// give registers a low priority when they are required by machine
++// instructions, like EAX and EDX on I486, and choose no-save registers
++// before save-on-call, & save-on-call before save-on-entry.  Registers
++// which participate in fixed calling sequences should come last.
++// Registers which are used as pairs must fall on an even boundary.
 +
-+class VMRegImpl;
-+typedef VMRegImpl* VMReg;
++alloc_class chunk0(
++    // volatiles
++    R7,  R7_H,
++    R28, R28_H,
++    R29, R29_H,
++    R30, R30_H,
++    R31, R31_H,
 +
-+// Use Register as shortcut
-+class RegisterImpl;
-+typedef const RegisterImpl* Register;
++    // arg registers
++    R10, R10_H,
++    R11, R11_H,
++    R12, R12_H,
++    R13, R13_H,
++    R14, R14_H,
++    R15, R15_H,
++    R16, R16_H,
++    R17, R17_H,
 +
-+inline constexpr Register as_Register(int encoding);
++    // non-volatiles
++    R9,  R9_H,
++    R18, R18_H,
++    R19, R19_H,
++    R20, R20_H,
++    R21, R21_H,
++    R22, R22_H,
++    R24, R24_H,
++    R25, R25_H,
++    R26, R26_H,
 +
-+class RegisterImpl: public AbstractRegisterImpl {
-+  static constexpr Register first();
++    // non-allocatable registers
++    R23, R23_H, // java thread
++    R27, R27_H, // heapbase
++    R4,  R4_H,  // thread
++    R8,  R8_H,  // fp
++    R0,  R0_H,  // zero
++    R1,  R1_H,  // ra
++    R2,  R2_H,  // sp
++    R3,  R3_H,  // gp
++);
 +
-+ public:
-+  enum {
-+    number_of_registers      = 32,
-+    max_slots_per_register   = 2,
++alloc_class chunk1(
 +
-+    // integer registers x8 - x15 and floating-point registers f8 - f15 are allocatable
-+    // for compressed instructions. See Table 17.2 in spec.
-+    compressed_register_base = 8,
-+    compressed_register_top  = 15,
-+  };
++    // no save
++    F0,  F0_H,
++    F1,  F1_H,
++    F2,  F2_H,
++    F3,  F3_H,
++    F4,  F4_H,
++    F5,  F5_H,
++    F6,  F6_H,
++    F7,  F7_H,
++    F28, F28_H,
++    F29, F29_H,
++    F30, F30_H,
++    F31, F31_H,
 +
-+  // derived registers, offsets, and addresses
-+  const Register successor() const { return this + 1; }
++    // arg registers
++    F10, F10_H,
++    F11, F11_H,
++    F12, F12_H,
++    F13, F13_H,
++    F14, F14_H,
++    F15, F15_H,
++    F16, F16_H,
++    F17, F17_H,
 +
-+  // construction
-+  inline friend constexpr Register as_Register(int encoding);
++    // non-volatiles
++    F8,  F8_H,
++    F9,  F9_H,
++    F18, F18_H,
++    F19, F19_H,
++    F20, F20_H,
++    F21, F21_H,
++    F22, F22_H,
++    F23, F23_H,
++    F24, F24_H,
++    F25, F25_H,
++    F26, F26_H,
++    F27, F27_H,
++);
 +
-+  VMReg as_VMReg() const;
++alloc_class chunk2(RFLAGS);
 +
-+  // accessors
-+  int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
-+  int encoding_nocheck() const    { return this - first(); }
-+  bool is_valid() const           { return (unsigned)encoding_nocheck() < number_of_registers; }
-+  const char* name() const;
++//----------Architecture Description Register Classes--------------------------
++// Several register classes are automatically defined based upon information in
++// this architecture description.
++// 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
++// 2) reg_class compiler_method_reg        ( /* as def'd in frame section */ )
++// 2) reg_class interpreter_method_reg     ( /* as def'd in frame section */ )
++// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
++//
 +
-+  // for rvc
-+  int compressed_encoding() const {
-+    assert(is_compressed_valid(), "invalid compressed register");
-+    return encoding() - compressed_register_base;
-+  }
++// Class for all 32 bit general purpose registers
++reg_class all_reg32(
++    R0,
++    R1,
++    R2,
++    R3,
++    R4,
++    R7,
++    R8,
++    R9,
++    R10,
++    R11,
++    R12,
++    R13,
++    R14,
++    R15,
++    R16,
++    R17,
++    R18,
++    R19,
++    R20,
++    R21,
++    R22,
++    R23,
++    R24,
++    R25,
++    R26,
++    R27,
++    R28,
++    R29,
++    R30,
++    R31
++);
 +
-+  int compressed_encoding_nocheck() const {
-+    return encoding_nocheck() - compressed_register_base;
-+  }
++// Class for any 32 bit integer registers (excluding zr)
++reg_class any_reg32 %{
++  return _ANY_REG32_mask;
++%}
 +
-+  bool is_compressed_valid() const {
-+    return encoding_nocheck() >= compressed_register_base &&
-+           encoding_nocheck() <= compressed_register_top;
-+  }
-+};
++// Singleton class for R10 int register
++reg_class int_r10_reg(R10);
 +
-+REGISTER_IMPL_DECLARATION(Register, RegisterImpl, RegisterImpl::number_of_registers);
++// Singleton class for R12 int register
++reg_class int_r12_reg(R12);
 +
-+// The integer registers of the RISCV architecture
++// Singleton class for R13 int register
++reg_class int_r13_reg(R13);
 +
-+CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
++// Singleton class for R14 int register
++reg_class int_r14_reg(R14);
 +
-+CONSTANT_REGISTER_DECLARATION(Register, x0,    (0));
-+CONSTANT_REGISTER_DECLARATION(Register, x1,    (1));
-+CONSTANT_REGISTER_DECLARATION(Register, x2,    (2));
-+CONSTANT_REGISTER_DECLARATION(Register, x3,    (3));
-+CONSTANT_REGISTER_DECLARATION(Register, x4,    (4));
-+CONSTANT_REGISTER_DECLARATION(Register, x5,    (5));
-+CONSTANT_REGISTER_DECLARATION(Register, x6,    (6));
-+CONSTANT_REGISTER_DECLARATION(Register, x7,    (7));
-+CONSTANT_REGISTER_DECLARATION(Register, x8,    (8));
-+CONSTANT_REGISTER_DECLARATION(Register, x9,    (9));
-+CONSTANT_REGISTER_DECLARATION(Register, x10,  (10));
-+CONSTANT_REGISTER_DECLARATION(Register, x11,  (11));
-+CONSTANT_REGISTER_DECLARATION(Register, x12,  (12));
-+CONSTANT_REGISTER_DECLARATION(Register, x13,  (13));
-+CONSTANT_REGISTER_DECLARATION(Register, x14,  (14));
-+CONSTANT_REGISTER_DECLARATION(Register, x15,  (15));
-+CONSTANT_REGISTER_DECLARATION(Register, x16,  (16));
-+CONSTANT_REGISTER_DECLARATION(Register, x17,  (17));
-+CONSTANT_REGISTER_DECLARATION(Register, x18,  (18));
-+CONSTANT_REGISTER_DECLARATION(Register, x19,  (19));
-+CONSTANT_REGISTER_DECLARATION(Register, x20,  (20));
-+CONSTANT_REGISTER_DECLARATION(Register, x21,  (21));
-+CONSTANT_REGISTER_DECLARATION(Register, x22,  (22));
-+CONSTANT_REGISTER_DECLARATION(Register, x23,  (23));
-+CONSTANT_REGISTER_DECLARATION(Register, x24,  (24));
-+CONSTANT_REGISTER_DECLARATION(Register, x25,  (25));
-+CONSTANT_REGISTER_DECLARATION(Register, x26,  (26));
-+CONSTANT_REGISTER_DECLARATION(Register, x27,  (27));
-+CONSTANT_REGISTER_DECLARATION(Register, x28,  (28));
-+CONSTANT_REGISTER_DECLARATION(Register, x29,  (29));
-+CONSTANT_REGISTER_DECLARATION(Register, x30,  (30));
-+CONSTANT_REGISTER_DECLARATION(Register, x31,  (31));
++// Class for all long integer registers
++reg_class all_reg(
++    R0,  R0_H,
++    R1,  R1_H,
++    R2,  R2_H,
++    R3,  R3_H,
++    R4,  R4_H,
++    R7,  R7_H,
++    R8,  R8_H,
++    R9,  R9_H,
++    R10, R10_H,
++    R11, R11_H,
++    R12, R12_H,
++    R13, R13_H,
++    R14, R14_H,
++    R15, R15_H,
++    R16, R16_H,
++    R17, R17_H,
++    R18, R18_H,
++    R19, R19_H,
++    R20, R20_H,
++    R21, R21_H,
++    R22, R22_H,
++    R23, R23_H,
++    R24, R24_H,
++    R25, R25_H,
++    R26, R26_H,
++    R27, R27_H,
++    R28, R28_H,
++    R29, R29_H,
++    R30, R30_H,
++    R31, R31_H
++);
 +
-+// Use FloatRegister as shortcut
-+class FloatRegisterImpl;
-+typedef const FloatRegisterImpl* FloatRegister;
++// Class for all long integer registers (excluding zr)
++reg_class any_reg %{
++  return _ANY_REG_mask;
++%}
 +
-+inline constexpr FloatRegister as_FloatRegister(int encoding);
++// Class for non-allocatable 32 bit registers
++reg_class non_allocatable_reg32(
++    R0,                       // zr
++    R1,                       // ra
++    R2,                       // sp
++    R3,                       // gp
++    R4,                       // tp
++    R23                       // java thread
++);
 +
-+// The implementation of floating point registers for the architecture
-+class FloatRegisterImpl: public AbstractRegisterImpl {
-+  static constexpr FloatRegister first();
++// Class for non-allocatable 64 bit registers
++reg_class non_allocatable_reg(
++    R0,  R0_H,                // zr
++    R1,  R1_H,                // ra
++    R2,  R2_H,                // sp
++    R3,  R3_H,                // gp
++    R4,  R4_H,                // tp
++    R23, R23_H                // java thread
++);
 +
-+ public:
-+  enum {
-+    number_of_registers     = 32,
-+    max_slots_per_register  = 2,
++reg_class no_special_reg32 %{
++  return _NO_SPECIAL_REG32_mask;
++%}
 +
-+    // float registers in the range of [f8~f15] correspond to RVC. Please see Table 16.2 in spec.
-+    compressed_register_base = 8,
-+    compressed_register_top  = 15,
-+  };
++reg_class no_special_reg %{
++  return _NO_SPECIAL_REG_mask;
++%}
 +
-+  // construction
-+  inline friend constexpr FloatRegister as_FloatRegister(int encoding);
++reg_class ptr_reg %{
++  return _PTR_REG_mask;
++%}
 +
-+  VMReg as_VMReg() const;
++reg_class no_special_ptr_reg %{
++  return _NO_SPECIAL_PTR_REG_mask;
++%}
 +
-+  // derived registers, offsets, and addresses
-+  FloatRegister successor() const {
-+    return as_FloatRegister((encoding() + 1) % (unsigned)number_of_registers);
-+  }
++// Class for 64 bit register r10
++reg_class r10_reg(
++    R10, R10_H
++);
 +
-+  // accessors
-+  int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
-+  int encoding_nocheck() const    { return this - first(); }
-+  int is_valid() const            { return (unsigned)encoding_nocheck() < number_of_registers; }
-+  const char* name() const;
++// Class for 64 bit register r11
++reg_class r11_reg(
++    R11, R11_H
++);
 +
-+  // for rvc
-+  int compressed_encoding() const {
-+    assert(is_compressed_valid(), "invalid compressed register");
-+    return encoding() - compressed_register_base;
-+  }
++// Class for 64 bit register r12
++reg_class r12_reg(
++    R12, R12_H
++);
 +
-+  int compressed_encoding_nocheck() const {
-+    return encoding_nocheck() - compressed_register_base;
-+  }
++// Class for 64 bit register r13
++reg_class r13_reg(
++    R13, R13_H
++);
 +
-+  bool is_compressed_valid() const {
-+    return encoding_nocheck() >= compressed_register_base &&
-+           encoding_nocheck() <= compressed_register_top;
-+  }
-+};
++// Class for 64 bit register r14
++reg_class r14_reg(
++    R14, R14_H
++);
 +
-+REGISTER_IMPL_DECLARATION(FloatRegister, FloatRegisterImpl, FloatRegisterImpl::number_of_registers);
++// Class for 64 bit register r15
++reg_class r15_reg(
++    R15, R15_H
++);
 +
-+// The float registers of the RISCV architecture
++// Class for 64 bit register r16
++reg_class r16_reg(
++    R16, R16_H
++);
 +
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1));
++// Class for method register
++reg_class method_reg(
++    R31, R31_H
++);
 +
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f0     , ( 0));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f1     , ( 1));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f2     , ( 2));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f3     , ( 3));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f4     , ( 4));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f5     , ( 5));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f6     , ( 6));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f7     , ( 7));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f8     , ( 8));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f9     , ( 9));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f10    , (10));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f11    , (11));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f12    , (12));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f13    , (13));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f14    , (14));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f15    , (15));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f16    , (16));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f17    , (17));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f18    , (18));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f19    , (19));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f20    , (20));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f21    , (21));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f22    , (22));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f23    , (23));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f24    , (24));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f25    , (25));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f26    , (26));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f27    , (27));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f28    , (28));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f29    , (29));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f30    , (30));
-+CONSTANT_REGISTER_DECLARATION(FloatRegister, f31    , (31));
++// Class for heapbase register
++reg_class heapbase_reg(
++    R27, R27_H
++);
 +
-+// Use VectorRegister as shortcut
-+class VectorRegisterImpl;
-+typedef const VectorRegisterImpl* VectorRegister;
++// Class for java thread register
++reg_class java_thread_reg(
++    R23, R23_H
++);
 +
-+inline constexpr VectorRegister as_VectorRegister(int encoding);
++reg_class r28_reg(
++    R28, R28_H
++);
 +
-+// The implementation of vector registers for RVV
-+class VectorRegisterImpl: public AbstractRegisterImpl {
-+  static constexpr VectorRegister first();
++reg_class r29_reg(
++    R29, R29_H
++);
 +
-+ public:
-+  enum {
-+    number_of_registers    = 32,
-+    max_slots_per_register = 4
-+  };
++reg_class r30_reg(
++    R30, R30_H
++);
 +
-+  // construction
-+  inline friend constexpr VectorRegister as_VectorRegister(int encoding);
++// Class for zero registesr
++reg_class zr_reg(
++    R0, R0_H
++);
 +
-+  VMReg as_VMReg() const;
++// Class for thread register
++reg_class thread_reg(
++    R4, R4_H
++);
 +
-+  // derived registers, offsets, and addresses
-+  VectorRegister successor() const { return this + 1; }
++// Class for frame pointer register
++reg_class fp_reg(
++    R8, R8_H
++);
 +
-+  // accessors
-+  int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
-+  int encoding_nocheck() const    { return this - first(); }
-+  bool is_valid() const           { return (unsigned)encoding_nocheck() < number_of_registers; }
-+  const char* name() const;
++// Class for link register
++reg_class ra_reg(
++    R1, R1_H
++);
 +
-+};
++// Class for long sp register
++reg_class sp_reg(
++    R2, R2_H
++);
 +
-+REGISTER_IMPL_DECLARATION(VectorRegister, VectorRegisterImpl, VectorRegisterImpl::number_of_registers);
++// Class for all float registers
++reg_class float_reg(
++    F0,
++    F1,
++    F2,
++    F3,
++    F4,
++    F5,
++    F6,
++    F7,
++    F8,
++    F9,
++    F10,
++    F11,
++    F12,
++    F13,
++    F14,
++    F15,
++    F16,
++    F17,
++    F18,
++    F19,
++    F20,
++    F21,
++    F22,
++    F23,
++    F24,
++    F25,
++    F26,
++    F27,
++    F28,
++    F29,
++    F30,
++    F31
++);
 +
-+// The vector registers of RVV
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg , (-1));
++// Double precision float registers have virtual `high halves' that
++// are needed by the allocator.
++// Class for all double registers
++reg_class double_reg(
++    F0,  F0_H,
++    F1,  F1_H,
++    F2,  F2_H,
++    F3,  F3_H,
++    F4,  F4_H,
++    F5,  F5_H,
++    F6,  F6_H,
++    F7,  F7_H,
++    F8,  F8_H,
++    F9,  F9_H,
++    F10, F10_H,
++    F11, F11_H,
++    F12, F12_H,
++    F13, F13_H,
++    F14, F14_H,
++    F15, F15_H,
++    F16, F16_H,
++    F17, F17_H,
++    F18, F18_H,
++    F19, F19_H,
++    F20, F20_H,
++    F21, F21_H,
++    F22, F22_H,
++    F23, F23_H,
++    F24, F24_H,
++    F25, F25_H,
++    F26, F26_H,
++    F27, F27_H,
++    F28, F28_H,
++    F29, F29_H,
++    F30, F30_H,
++    F31, F31_H
++);
 +
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v0     , ( 0));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v1     , ( 1));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v2     , ( 2));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v3     , ( 3));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v4     , ( 4));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v5     , ( 5));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v6     , ( 6));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v7     , ( 7));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v8     , ( 8));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v9     , ( 9));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v10    , (10));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v11    , (11));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v12    , (12));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v13    , (13));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v14    , (14));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v15    , (15));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v16    , (16));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v17    , (17));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v18    , (18));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v19    , (19));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v20    , (20));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v21    , (21));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v22    , (22));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v23    , (23));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v24    , (24));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v25    , (25));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v26    , (26));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v27    , (27));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v28    , (28));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v29    , (29));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v30    , (30));
-+CONSTANT_REGISTER_DECLARATION(VectorRegister, v31    , (31));
++// Class for 64 bit register f0
++reg_class f0_reg(
++    F0, F0_H
++);
 +
++// Class for 64 bit register f1
++reg_class f1_reg(
++    F1, F1_H
++);
 +
-+// Need to know the total number of registers of all sorts for SharedInfo.
-+// Define a class that exports it.
-+class ConcreteRegisterImpl : public AbstractRegisterImpl {
-+ public:
-+  enum {
-+  // A big enough number for C2: all the registers plus flags
-+  // This number must be large enough to cover REG_COUNT (defined by c2) registers.
-+  // There is no requirement that any ordering here matches any ordering c2 gives
-+  // it's optoregs.
++// Class for 64 bit register f2
++reg_class f2_reg(
++    F2, F2_H
++);
 +
-+    number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers +
-+                           FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers +
-+                           VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers)
-+  };
++// Class for 64 bit register f3
++reg_class f3_reg(
++    F3, F3_H
++);
 +
-+  // added to make it compile
-+  static const int max_gpr;
-+  static const int max_fpr;
-+  static const int max_vpr;
-+};
++// class for condition codes
++reg_class reg_flags(RFLAGS);
++%}
 +
-+typedef AbstractRegSet<Register> RegSet;
-+typedef AbstractRegSet<FloatRegister> FloatRegSet;
-+typedef AbstractRegSet<VectorRegister> VectorRegSet;
++//----------DEFINITION BLOCK---------------------------------------------------
++// Define name --> value mappings to inform the ADLC of an integer valued name
++// Current support includes integer values in the range [0, 0x7FFFFFFF]
++// Format:
++//        int_def  <name>         ( <int_value>, <expression>);
++// Generated Code in ad_<arch>.hpp
++//        #define  <name>   (<expression>)
++//        // value == <int_value>
++// Generated code in ad_<arch>.cpp adlc_verification()
++//        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
++//
 +
-+#endif // CPU_RISCV_REGISTER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
-new file mode 100644
-index 00000000000..228a64eae2c
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
-@@ -0,0 +1,113 @@
-+/*
-+ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++// we follow the ppc-aix port in using a simple cost model which ranks
++// register operations as cheap, memory ops as more expensive and
++// branches as most expensive. the first two have a low as well as a
++// normal cost. huge cost appears to be a way of saying don't do
++// something
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.hpp"
-+#include "code/relocInfo.hpp"
-+#include "nativeInst_riscv.hpp"
-+#include "oops/oop.inline.hpp"
-+#include "runtime/safepoint.hpp"
++definitions %{
++  // The default cost (of a register move instruction).
++  int_def DEFAULT_COST         (  100,               100);
++  int_def ALU_COST             (  100,  1 * DEFAULT_COST);          // unknown, const, arith, shift, slt,
++                                                                    // multi, auipc, nop, logical, move
++  int_def LOAD_COST            (  300,  3 * DEFAULT_COST);          // load, fpload
++  int_def STORE_COST           (  100,  1 * DEFAULT_COST);          // store, fpstore
++  int_def XFER_COST            (  300,  3 * DEFAULT_COST);          // mfc, mtc, fcvt, fmove, fcmp
++  int_def BRANCH_COST          (  200,  2 * DEFAULT_COST);          // branch, jmp, call
++  int_def IMUL_COST            ( 1000, 10 * DEFAULT_COST);          // imul
++  int_def IDIVSI_COST          ( 3400, 34 * DEFAULT_COST);          // idivdi
++  int_def IDIVDI_COST          ( 6600, 66 * DEFAULT_COST);          // idivsi
++  int_def FMUL_SINGLE_COST     (  500,  5 * DEFAULT_COST);          // fadd, fmul, fmadd
++  int_def FMUL_DOUBLE_COST     (  700,  7 * DEFAULT_COST);          // fadd, fmul, fmadd
++  int_def FDIV_COST            ( 2000, 20 * DEFAULT_COST);          // fdiv
++  int_def FSQRT_COST           ( 2500, 25 * DEFAULT_COST);          // fsqrt
++  int_def VOLATILE_REF_COST    ( 1000, 10 * DEFAULT_COST);
++%}
 +
-+void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
-+  if (verify_only) {
-+    return;
-+  }
 +
-+  int bytes;
 +
-+  switch (type()) {
-+    case relocInfo::oop_type: {
-+      oop_Relocation *reloc = (oop_Relocation *)this;
-+      // in movoop when BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate
-+      if (NativeInstruction::is_load_pc_relative_at(addr())) {
-+        address constptr = (address)code()->oop_addr_at(reloc->oop_index());
-+        bytes = MacroAssembler::pd_patch_instruction_size(addr(), constptr);
-+        assert(*(address*)constptr == x, "error in oop relocation");
-+      } else {
-+        bytes = MacroAssembler::patch_oop(addr(), x);
-+      }
-+      break;
-+    }
-+    default:
-+      bytes = MacroAssembler::pd_patch_instruction_size(addr(), x);
-+      break;
-+  }
-+  ICache::invalidate_range(addr(), bytes);
-+}
++//----------SOURCE BLOCK-------------------------------------------------------
++// This is a block of C++ code which provides values, functions, and
++// definitions necessary in the rest of the architecture description
 +
-+address Relocation::pd_call_destination(address orig_addr) {
-+  assert(is_call(), "should be an address instruction here");
-+  if (NativeCall::is_call_at(addr())) {
-+    address trampoline = nativeCall_at(addr())->get_trampoline();
-+    if (trampoline != NULL) {
-+      return nativeCallTrampolineStub_at(trampoline)->destination();
-+    }
-+  }
-+  if (orig_addr != NULL) {
-+    // the extracted address from the instructions in address orig_addr
-+    address new_addr = MacroAssembler::pd_call_destination(orig_addr);
-+    // If call is branch to self, don't try to relocate it, just leave it
-+    // as branch to self. This happens during code generation if the code
-+    // buffer expands. It will be relocated to the trampoline above once
-+    // code generation is complete.
-+    new_addr = (new_addr == orig_addr) ? addr() : new_addr;
-+    return new_addr;
-+  }
-+  return MacroAssembler::pd_call_destination(addr());
-+}
++source_hpp %{
 +
-+void Relocation::pd_set_call_destination(address x) {
-+  assert(is_call(), "should be an address instruction here");
-+  if (NativeCall::is_call_at(addr())) {
-+    address trampoline = nativeCall_at(addr())->get_trampoline();
-+    if (trampoline != NULL) {
-+      nativeCall_at(addr())->set_destination_mt_safe(x, /* assert_lock */false);
-+      return;
-+    }
-+  }
-+  MacroAssembler::pd_patch_instruction_size(addr(), x);
-+  address pd_call = pd_call_destination(addr());
-+  assert(pd_call == x, "fail in reloc");
-+}
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/cardTable.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "opto/addnode.hpp"
++#include "opto/convertnode.hpp"
 +
-+address* Relocation::pd_address_in_code() {
-+  assert(NativeCall::is_load_pc_relative_at(addr()), "Not the expected instruction sequence!");
-+  return (address*)(MacroAssembler::target_addr_for_insn(addr()));
-+}
++extern RegMask _ANY_REG32_mask;
++extern RegMask _ANY_REG_mask;
++extern RegMask _PTR_REG_mask;
++extern RegMask _NO_SPECIAL_REG32_mask;
++extern RegMask _NO_SPECIAL_REG_mask;
++extern RegMask _NO_SPECIAL_PTR_REG_mask;
 +
-+address Relocation::pd_get_address_from_code() {
-+  return MacroAssembler::pd_call_destination(addr());
-+}
++class CallStubImpl {
 +
-+void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
-+  if (NativeInstruction::maybe_cpool_ref(addr())) {
-+    address old_addr = old_addr_for(addr(), src, dest);
-+    MacroAssembler::pd_patch_instruction_size(addr(), MacroAssembler::target_addr_for_insn(old_addr));
-+  }
-+}
++  //--------------------------------------------------------------
++  //---<  Used for optimization in Compile::shorten_branches  >---
++  //--------------------------------------------------------------
 +
-+void metadata_Relocation::pd_fix_value(address x) {
-+}
-diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.hpp b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp
-new file mode 100644
-index 00000000000..840ed935d88
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp
-@@ -0,0 +1,44 @@
-+/*
-+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++ public:
++  // Size of call trampoline stub.
++  static uint size_call_trampoline() {
++    return 0; // no call trampolines on this platform
++  }
 +
-+#ifndef CPU_RISCV_RELOCINFO_RISCV_HPP
-+#define CPU_RISCV_RELOCINFO_RISCV_HPP
++  // number of relocations needed by a call trampoline stub
++  static uint reloc_call_trampoline() {
++    return 0; // no call trampolines on this platform
++  }
++};
 +
-+  // machine-dependent parts of class relocInfo
-+ private:
-+  enum {
-+    // Relocations are byte-aligned.
-+    offset_unit        =  1,
-+    // Must be at least 1 for RelocInfo::narrow_oop_in_const.
-+    format_width       =  1
-+  };
++class HandlerImpl {
 +
 + public:
 +
-+  // This platform has no oops in the code that are not also
-+  // listed in the oop section.
-+  static bool mustIterateImmediateOopsInCode() { return false; }
++  static int emit_exception_handler(CodeBuffer &cbuf);
++  static int emit_deopt_handler(CodeBuffer& cbuf);
 +
-+#endif // CPU_RISCV_RELOCINFO_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-new file mode 100644
-index 00000000000..588887e1d96
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -0,0 +1,10611 @@
-+//
-+// Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
-+// Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
-+// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+//
-+// This code is free software; you can redistribute it and/or modify it
-+// under the terms of the GNU General Public License version 2 only, as
-+// published by the Free Software Foundation.
-+//
-+// This code is distributed in the hope that it will be useful, but WITHOUT
-+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+// version 2 for more details (a copy is included in the LICENSE file that
-+// accompanied this code).
-+//
-+// You should have received a copy of the GNU General Public License version
-+// 2 along with this work; if not, write to the Free Software Foundation,
-+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+//
-+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+// or visit www.oracle.com if you need additional information or have any
-+// questions.
-+//
-+//
++  static uint size_exception_handler() {
++    return MacroAssembler::far_branch_size();
++  }
 +
-+// RISCV Architecture Description File
++  static uint size_deopt_handler() {
++    // count auipc + far branch
++    return NativeInstruction::instruction_size + MacroAssembler::far_branch_size();
++  }
++};
 +
-+//----------REGISTER DEFINITION BLOCK------------------------------------------
-+// This information is used by the matcher and the register allocator to
-+// describe individual registers and classes of registers within the target
-+// archtecture.
++bool is_CAS(int opcode, bool maybe_volatile);
 +
-+register %{
-+//----------Architecture Description Register Definitions----------------------
-+// General Registers
-+// "reg_def"  name ( register save type, C convention save type,
-+//                   ideal register type, encoding );
-+// Register Save Types:
-+//
-+// NS  = No-Save:       The register allocator assumes that these registers
-+//                      can be used without saving upon entry to the method, &
-+//                      that they do not need to be saved at call sites.
-+//
-+// SOC = Save-On-Call:  The register allocator assumes that these registers
-+//                      can be used without saving upon entry to the method,
-+//                      but that they must be saved at call sites.
-+//
-+// SOE = Save-On-Entry: The register allocator assumes that these registers
-+//                      must be saved before using them upon entry to the
-+//                      method, but they do not need to be saved at call
-+//                      sites.
-+//
-+// AS  = Always-Save:   The register allocator assumes that these registers
-+//                      must be saved before using them upon entry to the
-+//                      method, & that they must be saved at call sites.
-+//
-+// Ideal Register Type is used to determine how to save & restore a
-+// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
-+// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
-+//
-+// The encoding number is the actual bit-pattern placed into the opcodes.
++// predicate controlling translation of CompareAndSwapX
++bool needs_acquiring_load_reserved(const Node *load);
 +
-+// We must define the 64 bit int registers in two 32 bit halves, the
-+// real lower register and a virtual upper half register. upper halves
-+// are used by the register allocator but are not actually supplied as
-+// operands to memory ops.
-+//
-+// follow the C1 compiler in making registers
-+//
-+//   x7, x9-x17, x27-x31 volatile (caller save)
-+//   x0-x4, x8, x23 system (no save, no allocate)
-+//   x5-x6 non-allocatable (so we can use them as temporary regs)
++// predicate controlling translation of StoreCM
++bool unnecessary_storestore(const Node *storecm);
 +
-+//
-+// as regards Java usage. we don't use any callee save registers
-+// because this makes it difficult to de-optimise a frame (see comment
-+// in x86 implementation of Deoptimization::unwind_callee_save_values)
-+//
++// predicate controlling addressing modes
++bool size_fits_all_mem_uses(AddPNode* addp, int shift);
++%}
 +
-+// General Registers
++source %{
 +
-+reg_def R0      ( NS,  NS,  Op_RegI, 0,  x0->as_VMReg()         ); // zr
-+reg_def R0_H    ( NS,  NS,  Op_RegI, 0,  x0->as_VMReg()->next() );
-+reg_def R1      ( NS,  SOC, Op_RegI, 1,  x1->as_VMReg()         ); // ra
-+reg_def R1_H    ( NS,  SOC, Op_RegI, 1,  x1->as_VMReg()->next() );
-+reg_def R2      ( NS,  SOE, Op_RegI, 2,  x2->as_VMReg()         ); // sp
-+reg_def R2_H    ( NS,  SOE, Op_RegI, 2,  x2->as_VMReg()->next() );
-+reg_def R3      ( NS,  NS,  Op_RegI, 3,  x3->as_VMReg()         ); // gp
-+reg_def R3_H    ( NS,  NS,  Op_RegI, 3,  x3->as_VMReg()->next() );
-+reg_def R4      ( NS,  NS,  Op_RegI, 4,  x4->as_VMReg()         ); // tp
-+reg_def R4_H    ( NS,  NS,  Op_RegI, 4,  x4->as_VMReg()->next() );
-+reg_def R7      ( SOC, SOC, Op_RegI, 7,  x7->as_VMReg()         );
-+reg_def R7_H    ( SOC, SOC, Op_RegI, 7,  x7->as_VMReg()->next() );
-+reg_def R8      ( NS,  SOE, Op_RegI, 8,  x8->as_VMReg()         ); // fp
-+reg_def R8_H    ( NS,  SOE, Op_RegI, 8,  x8->as_VMReg()->next() );
-+reg_def R9      ( SOC, SOE, Op_RegI, 9,  x9->as_VMReg()         );
-+reg_def R9_H    ( SOC, SOE, Op_RegI, 9,  x9->as_VMReg()->next() );
-+reg_def R10     ( SOC, SOC, Op_RegI, 10, x10->as_VMReg()        );
-+reg_def R10_H   ( SOC, SOC, Op_RegI, 10, x10->as_VMReg()->next());
-+reg_def R11     ( SOC, SOC, Op_RegI, 11, x11->as_VMReg()        );
-+reg_def R11_H   ( SOC, SOC, Op_RegI, 11, x11->as_VMReg()->next());
-+reg_def R12     ( SOC, SOC, Op_RegI, 12, x12->as_VMReg()        );
-+reg_def R12_H   ( SOC, SOC, Op_RegI, 12, x12->as_VMReg()->next());
-+reg_def R13     ( SOC, SOC, Op_RegI, 13, x13->as_VMReg()        );
-+reg_def R13_H   ( SOC, SOC, Op_RegI, 13, x13->as_VMReg()->next());
-+reg_def R14     ( SOC, SOC, Op_RegI, 14, x14->as_VMReg()        );
-+reg_def R14_H   ( SOC, SOC, Op_RegI, 14, x14->as_VMReg()->next());
-+reg_def R15     ( SOC, SOC, Op_RegI, 15, x15->as_VMReg()        );
-+reg_def R15_H   ( SOC, SOC, Op_RegI, 15, x15->as_VMReg()->next());
-+reg_def R16     ( SOC, SOC, Op_RegI, 16, x16->as_VMReg()        );
-+reg_def R16_H   ( SOC, SOC, Op_RegI, 16, x16->as_VMReg()->next());
-+reg_def R17     ( SOC, SOC, Op_RegI, 17, x17->as_VMReg()        );
-+reg_def R17_H   ( SOC, SOC, Op_RegI, 17, x17->as_VMReg()->next());
-+reg_def R18     ( SOC, SOE, Op_RegI, 18, x18->as_VMReg()        );
-+reg_def R18_H   ( SOC, SOE, Op_RegI, 18, x18->as_VMReg()->next());
-+reg_def R19     ( SOC, SOE, Op_RegI, 19, x19->as_VMReg()        );
-+reg_def R19_H   ( SOC, SOE, Op_RegI, 19, x19->as_VMReg()->next());
-+reg_def R20     ( SOC, SOE, Op_RegI, 20, x20->as_VMReg()        ); // caller esp
-+reg_def R20_H   ( SOC, SOE, Op_RegI, 20, x20->as_VMReg()->next());
-+reg_def R21     ( SOC, SOE, Op_RegI, 21, x21->as_VMReg()        );
-+reg_def R21_H   ( SOC, SOE, Op_RegI, 21, x21->as_VMReg()->next());
-+reg_def R22     ( SOC, SOE, Op_RegI, 22, x22->as_VMReg()        );
-+reg_def R22_H   ( SOC, SOE, Op_RegI, 22, x22->as_VMReg()->next());
-+reg_def R23     ( NS,  SOE, Op_RegI, 23, x23->as_VMReg()        ); // java thread
-+reg_def R23_H   ( NS,  SOE, Op_RegI, 23, x23->as_VMReg()->next());
-+reg_def R24     ( SOC, SOE, Op_RegI, 24, x24->as_VMReg()        );
-+reg_def R24_H   ( SOC, SOE, Op_RegI, 24, x24->as_VMReg()->next());
-+reg_def R25     ( SOC, SOE, Op_RegI, 25, x25->as_VMReg()        );
-+reg_def R25_H   ( SOC, SOE, Op_RegI, 25, x25->as_VMReg()->next());
-+reg_def R26     ( SOC, SOE, Op_RegI, 26, x26->as_VMReg()        );
-+reg_def R26_H   ( SOC, SOE, Op_RegI, 26, x26->as_VMReg()->next());
-+reg_def R27     ( SOC, SOE, Op_RegI, 27, x27->as_VMReg()        ); // heapbase
-+reg_def R27_H   ( SOC, SOE, Op_RegI, 27, x27->as_VMReg()->next());
-+reg_def R28     ( SOC, SOC, Op_RegI, 28, x28->as_VMReg()        );
-+reg_def R28_H   ( SOC, SOC, Op_RegI, 28, x28->as_VMReg()->next());
-+reg_def R29     ( SOC, SOC, Op_RegI, 29, x29->as_VMReg()        );
-+reg_def R29_H   ( SOC, SOC, Op_RegI, 29, x29->as_VMReg()->next());
-+reg_def R30     ( SOC, SOC, Op_RegI, 30, x30->as_VMReg()        );
-+reg_def R30_H   ( SOC, SOC, Op_RegI, 30, x30->as_VMReg()->next());
-+reg_def R31     ( SOC, SOC, Op_RegI, 31, x31->as_VMReg()        );
-+reg_def R31_H   ( SOC, SOC, Op_RegI, 31, x31->as_VMReg()->next());
++// Derived RegMask with conditionally allocatable registers
 +
-+// ----------------------------
-+// Float/Double Registers
-+// ----------------------------
++RegMask _ANY_REG32_mask;
++RegMask _ANY_REG_mask;
++RegMask _PTR_REG_mask;
++RegMask _NO_SPECIAL_REG32_mask;
++RegMask _NO_SPECIAL_REG_mask;
++RegMask _NO_SPECIAL_PTR_REG_mask;
 +
-+// Double Registers
++void reg_mask_init() {
 +
-+// The rules of ADL require that double registers be defined in pairs.
-+// Each pair must be two 32-bit values, but not necessarily a pair of
-+// single float registers. In each pair, ADLC-assigned register numbers
-+// must be adjacent, with the lower number even. Finally, when the
-+// CPU stores such a register pair to memory, the word associated with
-+// the lower ADLC-assigned number must be stored to the lower address.
++  _ANY_REG32_mask = _ALL_REG32_mask;
++  _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(x0->as_VMReg()));
 +
-+// RISCV has 32 floating-point registers. Each can store a single
-+// or double precision floating-point value.
++  _ANY_REG_mask = _ALL_REG_mask;
++  _ANY_REG_mask.SUBTRACT(_ZR_REG_mask);
 +
-+// for Java use float registers f0-f31 are always save on call whereas
-+// the platform ABI treats f8-f9 and f18-f27 as callee save). Other
-+// float registers are SOC as per the platform spec
++  _PTR_REG_mask = _ALL_REG_mask;
++  _PTR_REG_mask.SUBTRACT(_ZR_REG_mask);
 +
-+reg_def F0    ( SOC, SOC, Op_RegF,  0,  f0->as_VMReg()          );
-+reg_def F0_H  ( SOC, SOC, Op_RegF,  0,  f0->as_VMReg()->next()  );
-+reg_def F1    ( SOC, SOC, Op_RegF,  1,  f1->as_VMReg()          );
-+reg_def F1_H  ( SOC, SOC, Op_RegF,  1,  f1->as_VMReg()->next()  );
-+reg_def F2    ( SOC, SOC, Op_RegF,  2,  f2->as_VMReg()          );
-+reg_def F2_H  ( SOC, SOC, Op_RegF,  2,  f2->as_VMReg()->next()  );
-+reg_def F3    ( SOC, SOC, Op_RegF,  3,  f3->as_VMReg()          );
-+reg_def F3_H  ( SOC, SOC, Op_RegF,  3,  f3->as_VMReg()->next()  );
-+reg_def F4    ( SOC, SOC, Op_RegF,  4,  f4->as_VMReg()          );
-+reg_def F4_H  ( SOC, SOC, Op_RegF,  4,  f4->as_VMReg()->next()  );
-+reg_def F5    ( SOC, SOC, Op_RegF,  5,  f5->as_VMReg()          );
-+reg_def F5_H  ( SOC, SOC, Op_RegF,  5,  f5->as_VMReg()->next()  );
-+reg_def F6    ( SOC, SOC, Op_RegF,  6,  f6->as_VMReg()          );
-+reg_def F6_H  ( SOC, SOC, Op_RegF,  6,  f6->as_VMReg()->next()  );
-+reg_def F7    ( SOC, SOC, Op_RegF,  7,  f7->as_VMReg()          );
-+reg_def F7_H  ( SOC, SOC, Op_RegF,  7,  f7->as_VMReg()->next()  );
-+reg_def F8    ( SOC, SOE, Op_RegF,  8,  f8->as_VMReg()          );
-+reg_def F8_H  ( SOC, SOE, Op_RegF,  8,  f8->as_VMReg()->next()  );
-+reg_def F9    ( SOC, SOE, Op_RegF,  9,  f9->as_VMReg()          );
-+reg_def F9_H  ( SOC, SOE, Op_RegF,  9,  f9->as_VMReg()->next()  );
-+reg_def F10   ( SOC, SOC, Op_RegF,  10, f10->as_VMReg()         );
-+reg_def F10_H ( SOC, SOC, Op_RegF,  10, f10->as_VMReg()->next() );
-+reg_def F11   ( SOC, SOC, Op_RegF,  11, f11->as_VMReg()         );
-+reg_def F11_H ( SOC, SOC, Op_RegF,  11, f11->as_VMReg()->next() );
-+reg_def F12   ( SOC, SOC, Op_RegF,  12, f12->as_VMReg()         );
-+reg_def F12_H ( SOC, SOC, Op_RegF,  12, f12->as_VMReg()->next() );
-+reg_def F13   ( SOC, SOC, Op_RegF,  13, f13->as_VMReg()         );
-+reg_def F13_H ( SOC, SOC, Op_RegF,  13, f13->as_VMReg()->next() );
-+reg_def F14   ( SOC, SOC, Op_RegF,  14, f14->as_VMReg()         );
-+reg_def F14_H ( SOC, SOC, Op_RegF,  14, f14->as_VMReg()->next() );
-+reg_def F15   ( SOC, SOC, Op_RegF,  15, f15->as_VMReg()         );
-+reg_def F15_H ( SOC, SOC, Op_RegF,  15, f15->as_VMReg()->next() );
-+reg_def F16   ( SOC, SOC, Op_RegF,  16, f16->as_VMReg()         );
-+reg_def F16_H ( SOC, SOC, Op_RegF,  16, f16->as_VMReg()->next() );
-+reg_def F17   ( SOC, SOC, Op_RegF,  17, f17->as_VMReg()         );
-+reg_def F17_H ( SOC, SOC, Op_RegF,  17, f17->as_VMReg()->next() );
-+reg_def F18   ( SOC, SOE, Op_RegF,  18, f18->as_VMReg()         );
-+reg_def F18_H ( SOC, SOE, Op_RegF,  18, f18->as_VMReg()->next() );
-+reg_def F19   ( SOC, SOE, Op_RegF,  19, f19->as_VMReg()         );
-+reg_def F19_H ( SOC, SOE, Op_RegF,  19, f19->as_VMReg()->next() );
-+reg_def F20   ( SOC, SOE, Op_RegF,  20, f20->as_VMReg()         );
-+reg_def F20_H ( SOC, SOE, Op_RegF,  20, f20->as_VMReg()->next() );
-+reg_def F21   ( SOC, SOE, Op_RegF,  21, f21->as_VMReg()         );
-+reg_def F21_H ( SOC, SOE, Op_RegF,  21, f21->as_VMReg()->next() );
-+reg_def F22   ( SOC, SOE, Op_RegF,  22, f22->as_VMReg()         );
-+reg_def F22_H ( SOC, SOE, Op_RegF,  22, f22->as_VMReg()->next() );
-+reg_def F23   ( SOC, SOE, Op_RegF,  23, f23->as_VMReg()         );
-+reg_def F23_H ( SOC, SOE, Op_RegF,  23, f23->as_VMReg()->next() );
-+reg_def F24   ( SOC, SOE, Op_RegF,  24, f24->as_VMReg()         );
-+reg_def F24_H ( SOC, SOE, Op_RegF,  24, f24->as_VMReg()->next() );
-+reg_def F25   ( SOC, SOE, Op_RegF,  25, f25->as_VMReg()         );
-+reg_def F25_H ( SOC, SOE, Op_RegF,  25, f25->as_VMReg()->next() );
-+reg_def F26   ( SOC, SOE, Op_RegF,  26, f26->as_VMReg()         );
-+reg_def F26_H ( SOC, SOE, Op_RegF,  26, f26->as_VMReg()->next() );
-+reg_def F27   ( SOC, SOE, Op_RegF,  27, f27->as_VMReg()         );
-+reg_def F27_H ( SOC, SOE, Op_RegF,  27, f27->as_VMReg()->next() );
-+reg_def F28   ( SOC, SOC, Op_RegF,  28, f28->as_VMReg()         );
-+reg_def F28_H ( SOC, SOC, Op_RegF,  28, f28->as_VMReg()->next() );
-+reg_def F29   ( SOC, SOC, Op_RegF,  29, f29->as_VMReg()         );
-+reg_def F29_H ( SOC, SOC, Op_RegF,  29, f29->as_VMReg()->next() );
-+reg_def F30   ( SOC, SOC, Op_RegF,  30, f30->as_VMReg()         );
-+reg_def F30_H ( SOC, SOC, Op_RegF,  30, f30->as_VMReg()->next() );
-+reg_def F31   ( SOC, SOC, Op_RegF,  31, f31->as_VMReg()         );
-+reg_def F31_H ( SOC, SOC, Op_RegF,  31, f31->as_VMReg()->next() );
++  _NO_SPECIAL_REG32_mask = _ALL_REG32_mask;
++  _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask);
 +
-+// ----------------------------
-+// Vector Registers
-+// ----------------------------
++  _NO_SPECIAL_REG_mask = _ALL_REG_mask;
++  _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
 +
-+// For RVV vector registers, we simply extend vector register size to 4
-+// 'logical' slots. This is nominally 128 bits but it actually covers
-+// all possible 'physical' RVV vector register lengths from 128 ~ 1024
-+// bits. The 'physical' RVV vector register length is detected during
-+// startup, so the register allocator is able to identify the correct
-+// number of bytes needed for an RVV spill/unspill.
-+
-+reg_def V0    ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()           );
-+reg_def V0_H  ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()->next()   );
-+reg_def V0_J  ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()->next(2)  );
-+reg_def V0_K  ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()->next(3)  );
-+
-+reg_def V1    ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg() 	        );
-+reg_def V1_H  ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg()->next()   );
-+reg_def V1_J  ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg()->next(2)  );
-+reg_def V1_K  ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg()->next(3)  );
-+
-+reg_def V2    ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()           );
-+reg_def V2_H  ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()->next()   );
-+reg_def V2_J  ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()->next(2)  );
-+reg_def V2_K  ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()->next(3)  );
-+
-+reg_def V3    ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()           );
-+reg_def V3_H  ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()->next()   );
-+reg_def V3_J  ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()->next(2)  );
-+reg_def V3_K  ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()->next(3)  );
-+
-+reg_def V4    ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()           );
-+reg_def V4_H  ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()->next()   );
-+reg_def V4_J  ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()->next(2)  );
-+reg_def V4_K  ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()->next(3)  );
-+
-+reg_def V5    ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg() 	        );
-+reg_def V5_H  ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg()->next()   );
-+reg_def V5_J  ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg()->next(2)  );
-+reg_def V5_K  ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg()->next(3)  );
-+
-+reg_def V6    ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()           );
-+reg_def V6_H  ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()->next()   );
-+reg_def V6_J  ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()->next(2)  );
-+reg_def V6_K  ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()->next(3)  );
-+
-+reg_def V7    ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg() 	        );
-+reg_def V7_H  ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg()->next()   );
-+reg_def V7_J  ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg()->next(2)  );
-+reg_def V7_K  ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg()->next(3)  );
-+
-+reg_def V8    ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()           );
-+reg_def V8_H  ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()->next()   );
-+reg_def V8_J  ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()->next(2)  );
-+reg_def V8_K  ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()->next(3)  );
-+
-+reg_def V9    ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()           );
-+reg_def V9_H  ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()->next()   );
-+reg_def V9_J  ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()->next(2)  );
-+reg_def V9_K  ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()->next(3)  );
-+
-+reg_def V10   ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()          );
-+reg_def V10_H ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next()  );
-+reg_def V10_J ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(2) );
-+reg_def V10_K ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(3) );
-+
-+reg_def V11   ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()          );
-+reg_def V11_H ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next()  );
-+reg_def V11_J ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(2) );
-+reg_def V11_K ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(3) );
-+
-+reg_def V12   ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()          );
-+reg_def V12_H ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next()  );
-+reg_def V12_J ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(2) );
-+reg_def V12_K ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(3) );
-+
-+reg_def V13   ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()          );
-+reg_def V13_H ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next()  );
-+reg_def V13_J ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(2) );
-+reg_def V13_K ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(3) );
-+
-+reg_def V14   ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()          );
-+reg_def V14_H ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next()  );
-+reg_def V14_J ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(2) );
-+reg_def V14_K ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(3) );
-+
-+reg_def V15   ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()          );
-+reg_def V15_H ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next()  );
-+reg_def V15_J ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(2) );
-+reg_def V15_K ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(3) );
-+
-+reg_def V16   ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()          );
-+reg_def V16_H ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next()  );
-+reg_def V16_J ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(2) );
-+reg_def V16_K ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(3) );
-+
-+reg_def V17   ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()          );
-+reg_def V17_H ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next()  );
-+reg_def V17_J ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(2) );
-+reg_def V17_K ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(3) );
-+
-+reg_def V18   ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()          );
-+reg_def V18_H ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next()  );
-+reg_def V18_J ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(2) );
-+reg_def V18_K ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(3) );
-+
-+reg_def V19   ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()          );
-+reg_def V19_H ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next()  );
-+reg_def V19_J ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(2) );
-+reg_def V19_K ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(3) );
-+
-+reg_def V20   ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()          );
-+reg_def V20_H ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next()  );
-+reg_def V20_J ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(2) );
-+reg_def V20_K ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(3) );
-+
-+reg_def V21   ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()          );
-+reg_def V21_H ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next()  );
-+reg_def V21_J ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(2) );
-+reg_def V21_K ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(3) );
-+
-+reg_def V22   ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()          );
-+reg_def V22_H ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next()  );
-+reg_def V22_J ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(2) );
-+reg_def V22_K ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(3) );
-+
-+reg_def V23   ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()          );
-+reg_def V23_H ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next()  );
-+reg_def V23_J ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(2) );
-+reg_def V23_K ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(3) );
-+
-+reg_def V24   ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()          );
-+reg_def V24_H ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next()  );
-+reg_def V24_J ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(2) );
-+reg_def V24_K ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(3) );
-+
-+reg_def V25   ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()          );
-+reg_def V25_H ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next()  );
-+reg_def V25_J ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(2) );
-+reg_def V25_K ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(3) );
-+
-+reg_def V26   ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()          );
-+reg_def V26_H ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next()  );
-+reg_def V26_J ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(2) );
-+reg_def V26_K ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(3) );
-+
-+reg_def V27   ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()          );
-+reg_def V27_H ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next()  );
-+reg_def V27_J ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(2) );
-+reg_def V27_K ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(3) );
-+
-+reg_def V28   ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()          );
-+reg_def V28_H ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next()  );
-+reg_def V28_J ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(2) );
-+reg_def V28_K ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(3) );
-+
-+reg_def V29   ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()          );
-+reg_def V29_H ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next()  );
-+reg_def V29_J ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(2) );
-+reg_def V29_K ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(3) );
-+
-+reg_def V30   ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()          );
-+reg_def V30_H ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next()  );
-+reg_def V30_J ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(2) );
-+reg_def V30_K ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(3) );
-+
-+reg_def V31   ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()          );
-+reg_def V31_H ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next()  );
-+reg_def V31_J ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(2) );
-+reg_def V31_K ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(3) );
++  _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask;
++  _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
 +
-+// ----------------------------
-+// Special Registers
-+// ----------------------------
++  // x27 is not allocatable when compressed oops is on
++  if (UseCompressedOops) {
++    _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x27->as_VMReg()));
++    _NO_SPECIAL_REG_mask.SUBTRACT(_HEAPBASE_REG_mask);
++    _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_HEAPBASE_REG_mask);
++  }
 +
-+// On riscv, the physical flag register is missing, so we use t1 instead,
-+// to bridge the RegFlag semantics in share/opto
++  // x8 is not allocatable when PreserveFramePointer is on
++  if (PreserveFramePointer) {
++    _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x8->as_VMReg()));
++    _NO_SPECIAL_REG_mask.SUBTRACT(_FP_REG_mask);
++    _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_FP_REG_mask);
++  }
++}
 +
-+reg_def RFLAGS   (SOC, SOC, Op_RegFlags, 6, x6->as_VMReg()        );
++// is_CAS(int opcode, bool maybe_volatile)
++//
++// return true if opcode is one of the possible CompareAndSwapX
++// values otherwise false.
++bool is_CAS(int opcode, bool maybe_volatile)
++{
++  switch (opcode) {
++    // We handle these
++    case Op_CompareAndSwapI:
++    case Op_CompareAndSwapL:
++    case Op_CompareAndSwapP:
++    case Op_CompareAndSwapN:
++#if INCLUDE_SHENANDOAHGC
++    case Op_ShenandoahCompareAndSwapP:
++    case Op_ShenandoahCompareAndSwapN:
++#endif
++    case Op_CompareAndSwapB:
++    case Op_CompareAndSwapS:
++    case Op_GetAndSetI:
++    case Op_GetAndSetL:
++    case Op_GetAndSetP:
++    case Op_GetAndSetN:
++    case Op_GetAndAddI:
++    case Op_GetAndAddL:
++      return true;
++    case Op_CompareAndExchangeI:
++    case Op_CompareAndExchangeN:
++    case Op_CompareAndExchangeB:
++    case Op_CompareAndExchangeS:
++    case Op_CompareAndExchangeL:
++    case Op_CompareAndExchangeP:
++    case Op_WeakCompareAndSwapB:
++    case Op_WeakCompareAndSwapS:
++    case Op_WeakCompareAndSwapI:
++    case Op_WeakCompareAndSwapL:
++    case Op_WeakCompareAndSwapP:
++    case Op_WeakCompareAndSwapN:
++      return maybe_volatile;
++    default:
++      return false;
++  }
++}
 +
-+// Specify priority of register selection within phases of register
-+// allocation.  Highest priority is first.  A useful heuristic is to
-+// give registers a low priority when they are required by machine
-+// instructions, like EAX and EDX on I486, and choose no-save registers
-+// before save-on-call, & save-on-call before save-on-entry.  Registers
-+// which participate in fixed calling sequences should come last.
-+// Registers which are used as pairs must fall on an even boundary.
++// predicate controlling translation of CAS
++//
++// returns true if CAS needs to use an acquiring load otherwise false
++bool needs_acquiring_load_reserved(const Node *n)
++{
++  assert(n != NULL && is_CAS(n->Opcode(), true), "expecting a compare and swap");
 +
-+alloc_class chunk0(
-+    // volatiles
-+    R7,  R7_H,
-+    R28, R28_H,
-+    R29, R29_H,
-+    R30, R30_H,
-+    R31, R31_H,
++  LoadStoreNode* ldst = n->as_LoadStore();
++  if (n != NULL && is_CAS(n->Opcode(), false)) {
++    assert(ldst != NULL && ldst->trailing_membar() != NULL, "expected trailing membar");
++  } else {
++    return ldst != NULL && ldst->trailing_membar() != NULL;
++  }
++  // so we can just return true here
++  return true;
++}
 +
-+    // arg registers
-+    R10, R10_H,
-+    R11, R11_H,
-+    R12, R12_H,
-+    R13, R13_H,
-+    R14, R14_H,
-+    R15, R15_H,
-+    R16, R16_H,
-+    R17, R17_H,
++// predicate controlling translation of StoreCM
++//
++// returns true if a StoreStore must precede the card write otherwise
++// false
 +
-+    // non-volatiles
-+    R9,  R9_H,
-+    R18, R18_H,
-+    R19, R19_H,
-+    R20, R20_H,
-+    R21, R21_H,
-+    R22, R22_H,
-+    R24, R24_H,
-+    R25, R25_H,
-+    R26, R26_H,
++bool unnecessary_storestore(const Node *storecm)
++{
++  assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
 +
-+    // non-allocatable registers
-+    R23, R23_H, // java thread
-+    R27, R27_H, // heapbase
-+    R4,  R4_H,  // thread
-+    R8,  R8_H,  // fp
-+    R0,  R0_H,  // zero
-+    R1,  R1_H,  // ra
-+    R2,  R2_H,  // sp
-+    R3,  R3_H,  // gp
-+);
++  // we need to generate a dmb ishst between an object put and the
++  // associated card mark when we are using CMS without conditional
++  // card marking
 +
-+alloc_class chunk1(
++  if (UseConcMarkSweepGC && !UseCondCardMark) {
++    return false;
++  }
 +
-+    // no save
-+    F0,  F0_H,
-+    F1,  F1_H,
-+    F2,  F2_H,
-+    F3,  F3_H,
-+    F4,  F4_H,
-+    F5,  F5_H,
-+    F6,  F6_H,
-+    F7,  F7_H,
-+    F28, F28_H,
-+    F29, F29_H,
-+    F30, F30_H,
-+    F31, F31_H,
-+
-+    // arg registers
-+    F10, F10_H,
-+    F11, F11_H,
-+    F12, F12_H,
-+    F13, F13_H,
-+    F14, F14_H,
-+    F15, F15_H,
-+    F16, F16_H,
-+    F17, F17_H,
++  // a storestore is unnecesary in all other cases
 +
-+    // non-volatiles
-+    F8,  F8_H,
-+    F9,  F9_H,
-+    F18, F18_H,
-+    F19, F19_H,
-+    F20, F20_H,
-+    F21, F21_H,
-+    F22, F22_H,
-+    F23, F23_H,
-+    F24, F24_H,
-+    F25, F25_H,
-+    F26, F26_H,
-+    F27, F27_H,
-+);
++  return true;
++}
 +
-+alloc_class chunk2(
-+    V0, V0_H, V0_J, V0_K,
-+    V1, V1_H, V1_J, V1_K,
-+    V2, V2_H, V2_J, V2_K,
-+    V3, V3_H, V3_J, V3_K,
-+    V4, V4_H, V4_J, V4_K,
-+    V5, V5_H, V5_J, V5_K,
-+    V6, V6_H, V6_J, V6_K,
-+    V7, V7_H, V7_J, V7_K,
-+    V8, V8_H, V8_J, V8_K,
-+    V9, V9_H, V9_J, V9_K,
-+    V10, V10_H, V10_J, V10_K,
-+    V11, V11_H, V11_J, V11_K,
-+    V12, V12_H, V12_J, V12_K,
-+    V13, V13_H, V13_J, V13_K,
-+    V14, V14_H, V14_J, V14_K,
-+    V15, V15_H, V15_J, V15_K,
-+    V16, V16_H, V16_J, V16_K,
-+    V17, V17_H, V17_J, V17_K,
-+    V18, V18_H, V18_J, V18_K,
-+    V19, V19_H, V19_J, V19_K,
-+    V20, V20_H, V20_J, V20_K,
-+    V21, V21_H, V21_J, V21_K,
-+    V22, V22_H, V22_J, V22_K,
-+    V23, V23_H, V23_J, V23_K,
-+    V24, V24_H, V24_J, V24_K,
-+    V25, V25_H, V25_J, V25_K,
-+    V26, V26_H, V26_J, V26_K,
-+    V27, V27_H, V27_J, V27_K,
-+    V28, V28_H, V28_J, V28_K,
-+    V29, V29_H, V29_J, V29_K,
-+    V30, V30_H, V30_J, V30_K,
-+    V31, V31_H, V31_J, V31_K,
-+);
++#define __ _masm.
 +
-+alloc_class chunk3(RFLAGS);
++// advance declarations for helper functions to convert register
++// indices to register objects
 +
-+//----------Architecture Description Register Classes--------------------------
-+// Several register classes are automatically defined based upon information in
-+// this architecture description.
-+// 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
-+// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
++// the ad file has to provide implementations of certain methods
++// expected by the generic code
 +//
++// REQUIRED FUNCTIONALITY
 +
-+// Class for all 32 bit general purpose registers
-+reg_class all_reg32(
-+    R0,
-+    R1,
-+    R2,
-+    R3,
-+    R4,
-+    R7,
-+    R8,
-+    R9,
-+    R10,
-+    R11,
-+    R12,
-+    R13,
-+    R14,
-+    R15,
-+    R16,
-+    R17,
-+    R18,
-+    R19,
-+    R20,
-+    R21,
-+    R22,
-+    R23,
-+    R24,
-+    R25,
-+    R26,
-+    R27,
-+    R28,
-+    R29,
-+    R30,
-+    R31
-+);
-+
-+// Class for any 32 bit integer registers (excluding zr)
-+reg_class any_reg32 %{
-+  return _ANY_REG32_mask;
-+%}
++//=============================================================================
 +
-+// Singleton class for R10 int register
-+reg_class int_r10_reg(R10);
++// !!!!! Special hack to get all types of calls to specify the byte offset
++//       from the start of the call to the point where the return address
++//       will point.
 +
-+// Singleton class for R12 int register
-+reg_class int_r12_reg(R12);
++int MachCallStaticJavaNode::ret_addr_offset()
++{
++  // jal
++  return 1 * NativeInstruction::instruction_size;
++}
 +
-+// Singleton class for R13 int register
-+reg_class int_r13_reg(R13);
++int MachCallDynamicJavaNode::ret_addr_offset()
++{
++  return 7 * NativeInstruction::instruction_size; // movptr, jal
++}
 +
-+// Singleton class for R14 int register
-+reg_class int_r14_reg(R14);
++int MachCallRuntimeNode::ret_addr_offset() {
++  // for generated stubs the call will be
++  //   jal(addr)
++  // or with far branches
++  //   jal(trampoline_stub)
++  // for real runtime callouts it will be 11 instructions
++  // see riscv_enc_java_to_runtime
++  //   la(t1, retaddr)                ->  auipc + addi
++  //   la(t0, RuntimeAddress(addr))   ->  lui + addi + slli + addi + slli + addi
++  //   addi(sp, sp, -2 * wordSize)    ->  addi
++  //   sd(t1, Address(sp, wordSize))  ->  sd
++  //   jalr(t0)                       ->  jalr
++  CodeBlob *cb = CodeCache::find_blob(_entry_point);
++  if (cb != NULL) {
++    return 1 * NativeInstruction::instruction_size;
++  } else {
++    return 11 * NativeInstruction::instruction_size;
++  }
++}
 +
-+// Class for all long integer registers
-+reg_class all_reg(
-+    R0,  R0_H,
-+    R1,  R1_H,
-+    R2,  R2_H,
-+    R3,  R3_H,
-+    R4,  R4_H,
-+    R7,  R7_H,
-+    R8,  R8_H,
-+    R9,  R9_H,
-+    R10, R10_H,
-+    R11, R11_H,
-+    R12, R12_H,
-+    R13, R13_H,
-+    R14, R14_H,
-+    R15, R15_H,
-+    R16, R16_H,
-+    R17, R17_H,
-+    R18, R18_H,
-+    R19, R19_H,
-+    R20, R20_H,
-+    R21, R21_H,
-+    R22, R22_H,
-+    R23, R23_H,
-+    R24, R24_H,
-+    R25, R25_H,
-+    R26, R26_H,
-+    R27, R27_H,
-+    R28, R28_H,
-+    R29, R29_H,
-+    R30, R30_H,
-+    R31, R31_H
-+);
++//
++// Compute padding required for nodes which need alignment
++//
 +
-+// Class for all long integer registers (excluding zr)
-+reg_class any_reg %{
-+  return _ANY_REG_mask;
-+%}
++// With RVC a call instruction may get 2-byte aligned.
++// The address of the call instruction needs to be 4-byte aligned to
++// ensure that it does not span a cache line so that it can be patched.
++int CallStaticJavaDirectNode::compute_padding(int current_offset) const
++{
++  // to make sure the address of jal 4-byte aligned.
++  return align_up(current_offset, alignment_required()) - current_offset;
++}
 +
-+// Class for non-allocatable 32 bit registers
-+reg_class non_allocatable_reg32(
-+    R0,                       // zr
-+    R1,                       // ra
-+    R2,                       // sp
-+    R3,                       // gp
-+    R4,                       // tp
-+    R23                       // java thread
-+);
++// With RVC a call instruction may get 2-byte aligned.
++// The address of the call instruction needs to be 4-byte aligned to
++// ensure that it does not span a cache line so that it can be patched.
++int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
++{
++  // skip the movptr in MacroAssembler::ic_call():
++  // lui + addi + slli + addi + slli + addi
++  // Though movptr() has already 4-byte aligned with or without RVC,
++  // We need to prevent from further changes by explicitly calculating the size.
++  const int movptr_size = 6 * NativeInstruction::instruction_size;
++  current_offset += movptr_size;
++  // to make sure the address of jal 4-byte aligned.
++  return align_up(current_offset, alignment_required()) - current_offset;
++}
 +
-+// Class for non-allocatable 64 bit registers
-+reg_class non_allocatable_reg(
-+    R0,  R0_H,                // zr
-+    R1,  R1_H,                // ra
-+    R2,  R2_H,                // sp
-+    R3,  R3_H,                // gp
-+    R4,  R4_H,                // tp
-+    R23, R23_H                // java thread
-+);
++// Indicate if the safepoint node needs the polling page as an input
 +
-+reg_class no_special_reg32 %{
-+  return _NO_SPECIAL_REG32_mask;
-+%}
++// the shared code plants the oop data at the start of the generated
++// code for the safepoint node and that needs ot be at the load
++// instruction itself. so we cannot plant a mov of the safepoint poll
++// address followed by a load. setting this to true means the mov is
++// scheduled as a prior instruction. that's better for scheduling
++// anyway.
 +
-+reg_class no_special_reg %{
-+  return _NO_SPECIAL_REG_mask;
-+%}
++bool SafePointNode::needs_polling_address_input()
++{
++  return true;
++}
 +
-+reg_class ptr_reg %{
-+  return _PTR_REG_mask;
-+%}
++//=============================================================================
 +
-+reg_class no_special_ptr_reg %{
-+  return _NO_SPECIAL_PTR_REG_mask;
-+%}
++#ifndef PRODUCT
++void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
++  assert_cond(st != NULL);
++  st->print("BREAKPOINT");
++}
++#endif
 +
-+// Class for 64 bit register r10
-+reg_class r10_reg(
-+    R10, R10_H
-+);
++void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  MacroAssembler _masm(&cbuf);
++  Assembler::CompressibleRegion cr(&_masm);
++  __ ebreak();
++}
 +
-+// Class for 64 bit register r11
-+reg_class r11_reg(
-+    R11, R11_H
-+);
++uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_);
++}
 +
-+// Class for 64 bit register r12
-+reg_class r12_reg(
-+    R12, R12_H
-+);
++//=============================================================================
 +
-+// Class for 64 bit register r13
-+reg_class r13_reg(
-+    R13, R13_H
-+);
++#ifndef PRODUCT
++  void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
++    st->print("nop \t# %d bytes pad for loops and calls", _count);
++  }
++#endif
 +
-+// Class for 64 bit register r14
-+reg_class r14_reg(
-+    R14, R14_H
-+);
++  void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
++    MacroAssembler _masm(&cbuf);
++    Assembler::CompressibleRegion cr(&_masm); // nops shall be 2-byte under RVC for alignment purposes.
++    for (int i = 0; i < _count; i++) {
++      __ nop();
++    }
++  }
 +
-+// Class for 64 bit register r15
-+reg_class r15_reg(
-+    R15, R15_H
-+);
++  uint MachNopNode::size(PhaseRegAlloc*) const {
++    return _count * (UseRVC ? NativeInstruction::compressed_instruction_size : NativeInstruction::instruction_size);
++  }
 +
-+// Class for 64 bit register r16
-+reg_class r16_reg(
-+    R16, R16_H
-+);
++//=============================================================================
++const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 +
-+// Class for method register
-+reg_class method_reg(
-+    R31, R31_H
-+);
++int Compile::ConstantTable::calculate_table_base_offset() const {
++  return 0;  // absolute addressing, no offset
++}
 +
-+// Class for heapbase register
-+reg_class heapbase_reg(
-+    R27, R27_H
-+);
++bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
++void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
++  ShouldNotReachHere();
++}
 +
-+// Class for java thread register
-+reg_class java_thread_reg(
-+    R23, R23_H
-+);
++void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
++  // Empty encoding
++}
 +
-+reg_class r28_reg(
-+    R28, R28_H
-+);
++uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
++  return 0;
++}
 +
-+reg_class r29_reg(
-+    R29, R29_H
-+);
++#ifndef PRODUCT
++void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
++  assert_cond(st != NULL);
++  st->print("-- \t// MachConstantBaseNode (empty encoding)");
++}
++#endif
 +
-+reg_class r30_reg(
-+    R30, R30_H
-+);
++#ifndef PRODUCT
++void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
++  assert_cond(st != NULL && ra_ != NULL);
++  Compile* C = ra_->C;
 +
-+// Class for zero registesr
-+reg_class zr_reg(
-+    R0, R0_H
-+);
++  int framesize = C->frame_slots() << LogBytesPerInt;
 +
-+// Class for thread register
-+reg_class thread_reg(
-+    R4, R4_H
-+);
++  if (C->need_stack_bang(framesize)) {
++    st->print("# stack bang size=%d\n\t", framesize);
++  }
 +
-+// Class for frame pointer register
-+reg_class fp_reg(
-+    R8, R8_H
-+);
++  st->print("sd  fp, [sp, #%d]\n\t", - 2 * wordSize);
++  st->print("sd  ra, [sp, #%d]\n\t", - wordSize);
++  if (PreserveFramePointer) { st->print("sub  fp, sp, #%d\n\t", 2 * wordSize); }
++  st->print("sub sp, sp, #%d\n\t", framesize);
++}
++#endif
 +
-+// Class for link register
-+reg_class ra_reg(
-+    R1, R1_H
-+);
++void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  assert_cond(ra_ != NULL);
++  Compile* C = ra_->C;
++  MacroAssembler _masm(&cbuf);
 +
-+// Class for long sp register
-+reg_class sp_reg(
-+    R2, R2_H
-+);
++  // n.b. frame size includes space for return pc and fp
++  const int framesize = C->frame_size_in_bytes();
 +
-+// Class for all float registers
-+reg_class float_reg(
-+    F0,
-+    F1,
-+    F2,
-+    F3,
-+    F4,
-+    F5,
-+    F6,
-+    F7,
-+    F8,
-+    F9,
-+    F10,
-+    F11,
-+    F12,
-+    F13,
-+    F14,
-+    F15,
-+    F16,
-+    F17,
-+    F18,
-+    F19,
-+    F20,
-+    F21,
-+    F22,
-+    F23,
-+    F24,
-+    F25,
-+    F26,
-+    F27,
-+    F28,
-+    F29,
-+    F30,
-+    F31
-+);
++  // insert a nop at the start of the prolog so we can patch in a
++  // branch if we need to invalidate the method later
++  MacroAssembler::assert_alignment(__ pc());
++  __ nop();
 +
-+// Double precision float registers have virtual `high halves' that
-+// are needed by the allocator.
-+// Class for all double registers
-+reg_class double_reg(
-+    F0,  F0_H,
-+    F1,  F1_H,
-+    F2,  F2_H,
-+    F3,  F3_H,
-+    F4,  F4_H,
-+    F5,  F5_H,
-+    F6,  F6_H,
-+    F7,  F7_H,
-+    F8,  F8_H,
-+    F9,  F9_H,
-+    F10, F10_H,
-+    F11, F11_H,
-+    F12, F12_H,
-+    F13, F13_H,
-+    F14, F14_H,
-+    F15, F15_H,
-+    F16, F16_H,
-+    F17, F17_H,
-+    F18, F18_H,
-+    F19, F19_H,
-+    F20, F20_H,
-+    F21, F21_H,
-+    F22, F22_H,
-+    F23, F23_H,
-+    F24, F24_H,
-+    F25, F25_H,
-+    F26, F26_H,
-+    F27, F27_H,
-+    F28, F28_H,
-+    F29, F29_H,
-+    F30, F30_H,
-+    F31, F31_H
-+);
++  assert_cond(C != NULL);
 +
-+// Class for all RVV vector registers
-+reg_class vectora_reg(
-+    V1, V1_H, V1_J, V1_K,
-+    V2, V2_H, V2_J, V2_K,
-+    V3, V3_H, V3_J, V3_K,
-+    V4, V4_H, V4_J, V4_K,
-+    V5, V5_H, V5_J, V5_K,
-+    V6, V6_H, V6_J, V6_K,
-+    V7, V7_H, V7_J, V7_K,
-+    V8, V8_H, V8_J, V8_K,
-+    V9, V9_H, V9_J, V9_K,
-+    V10, V10_H, V10_J, V10_K,
-+    V11, V11_H, V11_J, V11_K,
-+    V12, V12_H, V12_J, V12_K,
-+    V13, V13_H, V13_J, V13_K,
-+    V14, V14_H, V14_J, V14_K,
-+    V15, V15_H, V15_J, V15_K,
-+    V16, V16_H, V16_J, V16_K,
-+    V17, V17_H, V17_J, V17_K,
-+    V18, V18_H, V18_J, V18_K,
-+    V19, V19_H, V19_J, V19_K,
-+    V20, V20_H, V20_J, V20_K,
-+    V21, V21_H, V21_J, V21_K,
-+    V22, V22_H, V22_J, V22_K,
-+    V23, V23_H, V23_J, V23_K,
-+    V24, V24_H, V24_J, V24_K,
-+    V25, V25_H, V25_J, V25_K,
-+    V26, V26_H, V26_J, V26_K,
-+    V27, V27_H, V27_J, V27_K,
-+    V28, V28_H, V28_J, V28_K,
-+    V29, V29_H, V29_J, V29_K,
-+    V30, V30_H, V30_J, V30_K,
-+    V31, V31_H, V31_J, V31_K
-+);
++  int bangsize = C->bang_size_in_bytes();
++  if (C->need_stack_bang(bangsize)) {
++    __ generate_stack_overflow_check(bangsize);
++  }
 +
-+// Class for 64 bit register f0
-+reg_class f0_reg(
-+    F0, F0_H
-+);
++  __ build_frame(framesize);
 +
-+// Class for 64 bit register f1
-+reg_class f1_reg(
-+    F1, F1_H
-+);
++  if (VerifyStackAtCalls) {
++    Unimplemented();
++  }
 +
-+// Class for 64 bit register f2
-+reg_class f2_reg(
-+    F2, F2_H
-+);
++  C->set_frame_complete(cbuf.insts_size());
 +
-+// Class for 64 bit register f3
-+reg_class f3_reg(
-+    F3, F3_H
-+);
++  if (C->has_mach_constant_base_node()) {
++    // NOTE: We set the table base offset here because users might be
++    // emitted before MachConstantBaseNode.
++    Compile::ConstantTable& constant_table = C->constant_table();
++    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
++  }
++}
 +
-+// class for vector register v1
-+reg_class v1_reg(
-+    V1, V1_H, V1_J, V1_K
-+);
++uint MachPrologNode::size(PhaseRegAlloc* ra_) const
++{
++  assert_cond(ra_ != NULL);
++  return MachNode::size(ra_); // too many variables; just compute it
++                              // the hard way
++}
 +
-+// class for vector register v2
-+reg_class v2_reg(
-+    V2, V2_H, V2_J, V2_K
-+);
++int MachPrologNode::reloc() const
++{
++  return 0;
++}
 +
-+// class for vector register v3
-+reg_class v3_reg(
-+    V3, V3_H, V3_J, V3_K
-+);
++//=============================================================================
 +
-+// class for vector register v4
-+reg_class v4_reg(
-+    V4, V4_H, V4_J, V4_K
-+);
++#ifndef PRODUCT
++void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
++  assert_cond(st != NULL && ra_ != NULL);
++  Compile* C = ra_->C;
++  assert_cond(C != NULL);
++  int framesize = C->frame_size_in_bytes();
 +
-+// class for vector register v5
-+reg_class v5_reg(
-+    V5, V5_H, V5_J, V5_K
-+);
++  st->print("# pop frame %d\n\t", framesize);
 +
-+// class for condition codes
-+reg_class reg_flags(RFLAGS);
-+%}
-+
-+//----------DEFINITION BLOCK---------------------------------------------------
-+// Define name --> value mappings to inform the ADLC of an integer valued name
-+// Current support includes integer values in the range [0, 0x7FFFFFFF]
-+// Format:
-+//        int_def  <name>         ( <int_value>, <expression>);
-+// Generated Code in ad_<arch>.hpp
-+//        #define  <name>   (<expression>)
-+//        // value == <int_value>
-+// Generated code in ad_<arch>.cpp adlc_verification()
-+//        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
-+//
-+
-+// we follow the ppc-aix port in using a simple cost model which ranks
-+// register operations as cheap, memory ops as more expensive and
-+// branches as most expensive. the first two have a low as well as a
-+// normal cost. huge cost appears to be a way of saying don't do
-+// something
-+
-+definitions %{
-+  // The default cost (of a register move instruction).
-+  int_def DEFAULT_COST         (  100,               100);
-+  int_def ALU_COST             (  100,  1 * DEFAULT_COST);          // unknown, const, arith, shift, slt,
-+                                                                    // multi, auipc, nop, logical, move
-+  int_def LOAD_COST            (  300,  3 * DEFAULT_COST);          // load, fpload
-+  int_def STORE_COST           (  100,  1 * DEFAULT_COST);          // store, fpstore
-+  int_def XFER_COST            (  300,  3 * DEFAULT_COST);          // mfc, mtc, fcvt, fmove, fcmp
-+  int_def BRANCH_COST          (  100,  1 * DEFAULT_COST);          // branch, jmp, call
-+  int_def IMUL_COST            ( 1000, 10 * DEFAULT_COST);          // imul
-+  int_def IDIVSI_COST          ( 3400, 34 * DEFAULT_COST);          // idivdi
-+  int_def IDIVDI_COST          ( 6600, 66 * DEFAULT_COST);          // idivsi
-+  int_def FMUL_SINGLE_COST     (  500,  5 * DEFAULT_COST);          // fadd, fmul, fmadd
-+  int_def FMUL_DOUBLE_COST     (  700,  7 * DEFAULT_COST);          // fadd, fmul, fmadd
-+  int_def FDIV_COST            ( 2000, 20 * DEFAULT_COST);          // fdiv
-+  int_def FSQRT_COST           ( 2500, 25 * DEFAULT_COST);          // fsqrt
-+  int_def VOLATILE_REF_COST    ( 1000, 10 * DEFAULT_COST);
-+%}
-+
-+
-+
-+//----------SOURCE BLOCK-------------------------------------------------------
-+// This is a block of C++ code which provides values, functions, and
-+// definitions necessary in the rest of the architecture description
-+
-+source_hpp %{
-+
-+#include "asm/macroAssembler.hpp"
-+#include "gc/shared/cardTable.hpp"
-+#include "gc/shared/cardTableBarrierSet.hpp"
-+#include "gc/shared/collectedHeap.hpp"
-+#include "opto/addnode.hpp"
-+#include "opto/convertnode.hpp"
-+
-+extern RegMask _ANY_REG32_mask;
-+extern RegMask _ANY_REG_mask;
-+extern RegMask _PTR_REG_mask;
-+extern RegMask _NO_SPECIAL_REG32_mask;
-+extern RegMask _NO_SPECIAL_REG_mask;
-+extern RegMask _NO_SPECIAL_PTR_REG_mask;
-+
-+class CallStubImpl {
-+
-+  //--------------------------------------------------------------
-+  //---<  Used for optimization in Compile::shorten_branches  >---
-+  //--------------------------------------------------------------
-+
-+ public:
-+  // Size of call trampoline stub.
-+  static uint size_call_trampoline() {
-+    return 0; // no call trampolines on this platform
-+  }
-+
-+  // number of relocations needed by a call trampoline stub
-+  static uint reloc_call_trampoline() {
-+    return 0; // no call trampolines on this platform
-+  }
-+};
-+
-+class HandlerImpl {
-+
-+ public:
-+
-+  static int emit_exception_handler(CodeBuffer &cbuf);
-+  static int emit_deopt_handler(CodeBuffer& cbuf);
-+
-+  static uint size_exception_handler() {
-+    return MacroAssembler::far_branch_size();
-+  }
-+
-+  static uint size_deopt_handler() {
-+    // count auipc + far branch
-+    return NativeInstruction::instruction_size + MacroAssembler::far_branch_size();
-+  }
-+};
-+
-+class Node::PD {
-+public:
-+  enum NodeFlags {
-+    _last_flag = Node::_last_flag
-+  };
-+};
-+
-+bool is_CAS(int opcode, bool maybe_volatile);
-+
-+// predicate controlling translation of CompareAndSwapX
-+bool needs_acquiring_load_reserved(const Node *load);
-+
-+// predicate controlling addressing modes
-+bool size_fits_all_mem_uses(AddPNode* addp, int shift);
-+%}
-+
-+source %{
-+
-+// Derived RegMask with conditionally allocatable registers
-+
-+RegMask _ANY_REG32_mask;
-+RegMask _ANY_REG_mask;
-+RegMask _PTR_REG_mask;
-+RegMask _NO_SPECIAL_REG32_mask;
-+RegMask _NO_SPECIAL_REG_mask;
-+RegMask _NO_SPECIAL_PTR_REG_mask;
-+
-+void reg_mask_init() {
-+
-+  _ANY_REG32_mask = _ALL_REG32_mask;
-+  _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(x0->as_VMReg()));
-+
-+  _ANY_REG_mask = _ALL_REG_mask;
-+  _ANY_REG_mask.SUBTRACT(_ZR_REG_mask);
-+
-+  _PTR_REG_mask = _ALL_REG_mask;
-+  _PTR_REG_mask.SUBTRACT(_ZR_REG_mask);
-+
-+  _NO_SPECIAL_REG32_mask = _ALL_REG32_mask;
-+  _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask);
-+
-+  _NO_SPECIAL_REG_mask = _ALL_REG_mask;
-+  _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
-+
-+  _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask;
-+  _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
-+
-+  // x27 is not allocatable when compressed oops is on
-+  if (UseCompressedOops) {
-+    _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x27->as_VMReg()));
-+    _NO_SPECIAL_REG_mask.SUBTRACT(_HEAPBASE_REG_mask);
-+    _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_HEAPBASE_REG_mask);
-+  }
-+
-+  // x8 is not allocatable when PreserveFramePointer is on
-+  if (PreserveFramePointer) {
-+    _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x8->as_VMReg()));
-+    _NO_SPECIAL_REG_mask.SUBTRACT(_FP_REG_mask);
-+    _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_FP_REG_mask);
-+  }
-+}
-+
-+void PhaseOutput::pd_perform_mach_node_analysis() {
-+}
-+
-+int MachNode::pd_alignment_required() const {
-+  return 1;
-+}
-+
-+int MachNode::compute_padding(int current_offset) const {
-+  return 0;
-+}
-+
-+// is_CAS(int opcode, bool maybe_volatile)
-+//
-+// return true if opcode is one of the possible CompareAndSwapX
-+// values otherwise false.
-+bool is_CAS(int opcode, bool maybe_volatile)
-+{
-+  switch (opcode) {
-+    // We handle these
-+    case Op_CompareAndSwapI:
-+    case Op_CompareAndSwapL:
-+    case Op_CompareAndSwapP:
-+    case Op_CompareAndSwapN:
-+    case Op_ShenandoahCompareAndSwapP:
-+    case Op_ShenandoahCompareAndSwapN:
-+    case Op_CompareAndSwapB:
-+    case Op_CompareAndSwapS:
-+    case Op_GetAndSetI:
-+    case Op_GetAndSetL:
-+    case Op_GetAndSetP:
-+    case Op_GetAndSetN:
-+    case Op_GetAndAddI:
-+    case Op_GetAndAddL:
-+      return true;
-+    case Op_CompareAndExchangeI:
-+    case Op_CompareAndExchangeN:
-+    case Op_CompareAndExchangeB:
-+    case Op_CompareAndExchangeS:
-+    case Op_CompareAndExchangeL:
-+    case Op_CompareAndExchangeP:
-+    case Op_WeakCompareAndSwapB:
-+    case Op_WeakCompareAndSwapS:
-+    case Op_WeakCompareAndSwapI:
-+    case Op_WeakCompareAndSwapL:
-+    case Op_WeakCompareAndSwapP:
-+    case Op_WeakCompareAndSwapN:
-+    case Op_ShenandoahWeakCompareAndSwapP:
-+    case Op_ShenandoahWeakCompareAndSwapN:
-+    case Op_ShenandoahCompareAndExchangeP:
-+    case Op_ShenandoahCompareAndExchangeN:
-+      return maybe_volatile;
-+    default:
-+      return false;
-+  }
-+}
-+
-+// predicate controlling translation of CAS
-+//
-+// returns true if CAS needs to use an acquiring load otherwise false
-+bool needs_acquiring_load_reserved(const Node *n)
-+{
-+  assert(n != NULL && is_CAS(n->Opcode(), true), "expecting a compare and swap");
-+
-+  LoadStoreNode* ldst = n->as_LoadStore();
-+  if (n != NULL && is_CAS(n->Opcode(), false)) {
-+    assert(ldst != NULL && ldst->trailing_membar() != NULL, "expected trailing membar");
-+  } else {
-+    return ldst != NULL && ldst->trailing_membar() != NULL;
-+  }
-+  // so we can just return true here
-+  return true;
-+}
-+#define __ _masm.
-+
-+// advance declarations for helper functions to convert register
-+// indices to register objects
-+
-+// the ad file has to provide implementations of certain methods
-+// expected by the generic code
-+//
-+// REQUIRED FUNCTIONALITY
-+
-+//=============================================================================
-+
-+// !!!!! Special hack to get all types of calls to specify the byte offset
-+//       from the start of the call to the point where the return address
-+//       will point.
-+
-+int MachCallStaticJavaNode::ret_addr_offset()
-+{
-+  // jal
-+  return 1 * NativeInstruction::instruction_size;
-+}
-+
-+int MachCallDynamicJavaNode::ret_addr_offset()
-+{
-+  return 7 * NativeInstruction::instruction_size; // movptr, jal
-+}
-+
-+int MachCallRuntimeNode::ret_addr_offset() {
-+  // for generated stubs the call will be
-+  //   jal(addr)
-+  // or with far branches
-+  //   jal(trampoline_stub)
-+  // for real runtime callouts it will be 11 instructions
-+  // see riscv_enc_java_to_runtime
-+  //   la(t1, retaddr)                ->  auipc + addi
-+  //   la(t0, RuntimeAddress(addr))   ->  lui + addi + slli + addi + slli + addi
-+  //   addi(sp, sp, -2 * wordSize)    ->  addi
-+  //   sd(t1, Address(sp, wordSize))  ->  sd
-+  //   jalr(t0)                       ->  jalr
-+  CodeBlob *cb = CodeCache::find_blob(_entry_point);
-+  if (cb != NULL) {
-+    return 1 * NativeInstruction::instruction_size;
-+  } else {
-+    return 11 * NativeInstruction::instruction_size;
-+  }
-+}
-+
-+int MachCallNativeNode::ret_addr_offset() {
-+  Unimplemented();
-+  return -1;
-+}
-+
-+//
-+// Compute padding required for nodes which need alignment
-+//
-+
-+// With RVC a call instruction may get 2-byte aligned.
-+// The address of the call instruction needs to be 4-byte aligned to
-+// ensure that it does not span a cache line so that it can be patched.
-+int CallStaticJavaDirectNode::compute_padding(int current_offset) const
-+{
-+  // to make sure the address of jal 4-byte aligned.
-+  return align_up(current_offset, alignment_required()) - current_offset;
-+}
-+
-+// With RVC a call instruction may get 2-byte aligned.
-+// The address of the call instruction needs to be 4-byte aligned to
-+// ensure that it does not span a cache line so that it can be patched.
-+int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
-+{
-+  // skip the movptr in MacroAssembler::ic_call():
-+  // lui + addi + slli + addi + slli + addi
-+  // Though movptr() has already 4-byte aligned with or without RVC,
-+  // We need to prevent from further changes by explicitly calculating the size.
-+  const int movptr_size = 6 * NativeInstruction::instruction_size;
-+  current_offset += movptr_size;
-+  // to make sure the address of jal 4-byte aligned.
-+  return align_up(current_offset, alignment_required()) - current_offset;
-+}
-+
-+//=============================================================================
-+
-+#ifndef PRODUCT
-+void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
-+  assert_cond(st != NULL);
-+  st->print("BREAKPOINT");
-+}
-+#endif
-+
-+void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-+  C2_MacroAssembler _masm(&cbuf);
-+  Assembler::CompressibleRegion cr(&_masm);
-+  __ ebreak();
-+}
-+
-+uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
-+  return MachNode::size(ra_);
-+}
-+
-+//=============================================================================
-+
-+#ifndef PRODUCT
-+  void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
-+    st->print("nop \t# %d bytes pad for loops and calls", _count);
-+  }
-+#endif
-+
-+  void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
-+    C2_MacroAssembler _masm(&cbuf);
-+    Assembler::CompressibleRegion cr(&_masm); // nops shall be 2-byte under RVC for alignment purposes.
-+    for (int i = 0; i < _count; i++) {
-+      __ nop();
-+    }
-+  }
-+
-+  uint MachNopNode::size(PhaseRegAlloc*) const {
-+    return _count * (UseRVC ? NativeInstruction::compressed_instruction_size : NativeInstruction::instruction_size);
-+  }
-+
-+//=============================================================================
-+const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
-+
-+int ConstantTable::calculate_table_base_offset() const {
-+  return 0;  // absolute addressing, no offset
-+}
-+
-+bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
-+void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
-+  ShouldNotReachHere();
-+}
-+
-+void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
-+  // Empty encoding
-+}
-+
-+uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
-+  return 0;
-+}
-+
-+#ifndef PRODUCT
-+void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
-+  assert_cond(st != NULL);
-+  st->print("-- \t// MachConstantBaseNode (empty encoding)");
-+}
-+#endif
-+
-+#ifndef PRODUCT
-+void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
-+  assert_cond(st != NULL && ra_ != NULL);
-+  Compile* C = ra_->C;
-+
-+  int framesize = C->output()->frame_slots() << LogBytesPerInt;
-+
-+  if (C->output()->need_stack_bang(framesize)) {
-+    st->print("# stack bang size=%d\n\t", framesize);
-+  }
-+
-+  st->print("sd  fp, [sp, #%d]\n\t", - 2 * wordSize);
-+  st->print("sd  ra, [sp, #%d]\n\t", - wordSize);
-+  if (PreserveFramePointer) { st->print("sub  fp, sp, #%d\n\t", 2 * wordSize); }
-+  st->print("sub sp, sp, #%d\n\t", framesize);
-+
-+  if (C->stub_function() == NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
-+    st->print("ld  t0, [guard]\n\t");
-+    st->print("membar LoadLoad\n\t");
-+    st->print("ld  t1, [xthread, #thread_disarmed_offset]\n\t");
-+    st->print("beq t0, t1, skip\n\t");
-+    st->print("jalr #nmethod_entry_barrier_stub\n\t");
-+    st->print("j skip\n\t");
-+    st->print("guard: int\n\t");
-+    st->print("skip:\n\t");
-+  }
-+}
-+#endif
-+
-+void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-+  assert_cond(ra_ != NULL);
-+  Compile* C = ra_->C;
-+  C2_MacroAssembler _masm(&cbuf);
-+
-+  // n.b. frame size includes space for return pc and fp
-+  const int framesize = C->output()->frame_size_in_bytes();
-+
-+  // insert a nop at the start of the prolog so we can patch in a
-+  // branch if we need to invalidate the method later
-+  __ nop();
-+
-+  assert_cond(C != NULL);
-+
-+  if (C->clinit_barrier_on_entry()) {
-+    assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
-+
-+    Label L_skip_barrier;
-+
-+    __ mov_metadata(t1, C->method()->holder()->constant_encoding());
-+    __ clinit_barrier(t1, t0, &L_skip_barrier);
-+    __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
-+    __ bind(L_skip_barrier);
-+  }
-+
-+  int bangsize = C->output()->bang_size_in_bytes();
-+  if (C->output()->need_stack_bang(bangsize)) {
-+    __ generate_stack_overflow_check(bangsize);
-+  }
-+
-+  __ build_frame(framesize);
-+
-+  if (C->stub_function() == NULL) {
-+    BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+    bs->nmethod_entry_barrier(&_masm);
-+  }
-+
-+  if (VerifyStackAtCalls) {
-+    Unimplemented();
-+  }
-+
-+  C->output()->set_frame_complete(cbuf.insts_size());
-+
-+  if (C->has_mach_constant_base_node()) {
-+    // NOTE: We set the table base offset here because users might be
-+    // emitted before MachConstantBaseNode.
-+    ConstantTable& constant_table = C->output()->constant_table();
-+    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
-+  }
-+}
-+
-+uint MachPrologNode::size(PhaseRegAlloc* ra_) const
-+{
-+  assert_cond(ra_ != NULL);
-+  return MachNode::size(ra_); // too many variables; just compute it
-+                              // the hard way
-+}
-+
-+int MachPrologNode::reloc() const
-+{
-+  return 0;
-+}
-+
-+//=============================================================================
-+
-+#ifndef PRODUCT
-+void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
-+  assert_cond(st != NULL && ra_ != NULL);
-+  Compile* C = ra_->C;
-+  assert_cond(C != NULL);
-+  int framesize = C->output()->frame_size_in_bytes();
-+
-+  st->print("# pop frame %d\n\t", framesize);
-+
-+  if (framesize == 0) {
-+    st->print("ld  ra, [sp,#%d]\n\t", (2 * wordSize));
-+    st->print("ld  fp, [sp,#%d]\n\t", (3 * wordSize));
-+    st->print("add sp, sp, #%d\n\t", (2 * wordSize));
-+  } else {
-+    st->print("add  sp, sp, #%d\n\t", framesize);
-+    st->print("ld  ra, [sp,#%d]\n\t", - 2 * wordSize);
-+    st->print("ld  fp, [sp,#%d]\n\t", - wordSize);
-+  }
++  if (framesize == 0) {
++    st->print("ld  ra, [sp,#%d]\n\t", (2 * wordSize));
++    st->print("ld  fp, [sp,#%d]\n\t", (3 * wordSize));
++    st->print("add sp, sp, #%d\n\t", (2 * wordSize));
++  } else {
++    st->print("add  sp, sp, #%d\n\t", framesize);
++    st->print("ld  ra, [sp,#%d]\n\t", - 2 * wordSize);
++    st->print("ld  fp, [sp,#%d]\n\t", - wordSize);
++  }
 +
 +  if (do_polling() && C->is_method_compilation()) {
-+    st->print("# test polling word\n\t");
-+    st->print("ld t0, [xthread,#%d]\n\t", in_bytes(JavaThread::polling_word_offset()));
-+    st->print("bgtu sp, t0, #slow_path");
++    st->print("# touch polling page\n\t");
++    st->print("li  t0, #0x%lx\n\t", p2i(os::get_polling_page()));
++    st->print("ld  zr, [t0]");
 +  }
 +}
 +#endif
@@ -31252,9 +28863,9 @@ index 00000000000..588887e1d96
 +void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 +  assert_cond(ra_ != NULL);
 +  Compile* C = ra_->C;
-+  C2_MacroAssembler _masm(&cbuf);
++  MacroAssembler _masm(&cbuf);
 +  assert_cond(C != NULL);
-+  int framesize = C->output()->frame_size_in_bytes();
++  int framesize = C->frame_size_in_bytes();
 +
 +  __ remove_frame(framesize);
 +
@@ -31263,13 +28874,7 @@ index 00000000000..588887e1d96
 +  }
 +
 +  if (do_polling() && C->is_method_compilation()) {
-+    Label dummy_label;
-+    Label* code_stub = &dummy_label;
-+    if (!C->output()->in_scratch_emit_size()) {
-+      code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
-+    }
-+    __ relocate(relocInfo::poll_return_type);
-+    __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
++    __ read_polling_page(t0, os::get_polling_page(), relocInfo::poll_return_type);
 +  }
 +}
 +
@@ -31287,11 +28892,19 @@ index 00000000000..588887e1d96
 +  return MachNode::pipeline_class();
 +}
 +
++// This method seems to be obsolete. It is declared in machnode.hpp
++// and defined in all *.ad files, but it is never called. Should we
++// get rid of it?
++int MachEpilogNode::safepoint_offset() const {
++  assert(do_polling(), "no return for this epilog node");
++  return 4;
++}
++
 +//=============================================================================
 +
 +// Figure out which register class each belongs in: rc_int, rc_float or
 +// rc_stack.
-+enum RC { rc_bad, rc_int, rc_float, rc_vector, rc_stack };
++enum RC { rc_bad, rc_int, rc_float, rc_stack };
 +
 +static enum RC rc_class(OptoReg::Name reg) {
 +
@@ -31312,13 +28925,7 @@ index 00000000000..588887e1d96
 +    return rc_float;
 +  }
 +
-+  // we have 32 vector register * 4 halves
-+  int slots_of_vector_registers = VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers;
-+  if (reg < slots_of_int_registers + slots_of_float_registers + slots_of_vector_registers) {
-+    return rc_vector;
-+  }
-+
-+  // Between vector regs & stack is the flags regs.
++  // Between float regs & stack is the flags regs.
 +  assert(OptoReg::is_stack(reg), "blow up if spilling flags");
 +
 +  return rc_stack;
@@ -31356,31 +28963,8 @@ index 00000000000..588887e1d96
 +  int src_offset = ra_->reg2offset(src_lo);
 +  int dst_offset = ra_->reg2offset(dst_lo);
 +
-+  if (bottom_type()->isa_vect() != NULL) {
-+    uint ireg = ideal_reg();
-+    if (ireg == Op_VecA && cbuf) {
-+      C2_MacroAssembler _masm(cbuf);
-+      Assembler::CompressibleRegion cr(&_masm);
-+      int vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
-+      if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
-+        // stack to stack
-+        __ spill_copy_vector_stack_to_stack(src_offset, dst_offset,
-+                                            vector_reg_size_in_bytes);
-+      } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_stack) {
-+        // vpr to stack
-+        __ spill(as_VectorRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo));
-+      } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vector) {
-+        // stack to vpr
-+        __ unspill(as_VectorRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo));
-+      } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_vector) {
-+        // vpr to vpr
-+        __ vmv1r_v(as_VectorRegister(Matcher::_regEncode[dst_lo]), as_VectorRegister(Matcher::_regEncode[src_lo]));
-+      } else {
-+        ShouldNotReachHere();
-+      }
-+    }
-+  } else if (cbuf != NULL) {
-+    C2_MacroAssembler _masm(cbuf);
++  if (cbuf != NULL) {
++    MacroAssembler _masm(cbuf);
 +    Assembler::CompressibleRegion cr(&_masm);
 +    switch (src_lo_rc) {
 +      case rc_int:
@@ -31463,17 +29047,7 @@ index 00000000000..588887e1d96
 +    } else {
 +      st->print("%s", Matcher::regName[dst_lo]);
 +    }
-+    if (bottom_type()->isa_vect() != NULL) {
-+      int vsize = 0;
-+      if (ideal_reg() == Op_VecA) {
-+        vsize = Matcher::scalable_vector_reg_size(T_BYTE) * 8;
-+      } else {
-+        ShouldNotReachHere();
-+      }
-+      st->print("\t# vector spill size = %d", vsize);
-+    } else {
-+      st->print("\t# spill size = %d", is64 ? 64 : 32);
-+    }
++    st->print("\t# spill size = %d", is64 ? 64 : 32);
 +  }
 +
 +  return 0;
@@ -31510,7 +29084,7 @@ index 00000000000..588887e1d96
 +#endif
 +
 +void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-+  C2_MacroAssembler _masm(&cbuf);
++  MacroAssembler _masm(&cbuf);
 +
 +  assert_cond(ra_ != NULL);
 +  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
@@ -31546,7 +29120,7 @@ index 00000000000..588887e1d96
 +  st->print_cr("# MachUEPNode");
 +  if (UseCompressedClassPointers) {
 +    st->print_cr("\tlwu t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass");
-+    if (CompressedKlassPointers::shift() != 0) {
++    if (Universe::narrow_klass_shift() != 0) {
 +      st->print_cr("\tdecode_klass_not_null t0, t0");
 +    }
 +  } else {
@@ -31561,12 +29135,16 @@ index 00000000000..588887e1d96
 +void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 +{
 +  // This is the unverified entry point.
-+  C2_MacroAssembler _masm(&cbuf);
++  MacroAssembler _masm(&cbuf);
 +
 +  Label skip;
 +  __ cmp_klass(j_rarg0, t1, t0, skip);
 +  __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 +  __ bind(skip);
++
++  // These NOPs are critical so that verified entry point is properly
++  // 4 bytes aligned for patching by NativeJump::patch_verified_entry()
++  __ align(NativeInstruction::instruction_size);
 +}
 +
 +uint MachUEPNode::size(PhaseRegAlloc* ra_) const
@@ -31588,7 +29166,7 @@ index 00000000000..588887e1d96
 +  // j #exception_blob_entry_point
 +  // Note that the code buffer's insts_mark is always relative to insts.
 +  // That's why we must use the macroassembler to generate a handler.
-+  C2_MacroAssembler _masm(&cbuf);
++  MacroAssembler _masm(&cbuf);
 +  address base = __ start_a_stub(size_exception_handler());
 +  if (base == NULL) {
 +    ciEnv::current()->record_failure("CodeCache is full");
@@ -31606,7 +29184,7 @@ index 00000000000..588887e1d96
 +{
 +  // Note that the code buffer's insts_mark is always relative to insts.
 +  // That's why we must use the macroassembler to generate a handler.
-+  C2_MacroAssembler _masm(&cbuf);
++  MacroAssembler _masm(&cbuf);
 +  address base = __ start_a_stub(size_deopt_handler());
 +  if (base == NULL) {
 +    ciEnv::current()->record_failure("CodeCache is full");
@@ -31632,68 +29210,38 @@ index 00000000000..588887e1d96
 +  }
 +
 +  switch (opcode) {
-+    case Op_CacheWB:           // fall through
-+    case Op_CacheWBPreSync:    // fall through
-+    case Op_CacheWBPostSync:
-+      if (!VM_Version::supports_data_cache_line_flush()) {
-+        return false;
-+      }
-+      break;
-+
-+    case Op_StrCompressedCopy: // fall through
-+    case Op_StrInflatedCopy:   // fall through
-+    case Op_CountPositives:
-+      return UseRVV;
-+
-+    case Op_EncodeISOArray:
-+      return UseRVV && SpecialEncodeISOArray;
-+
 +    case Op_PopCountI:
 +    case Op_PopCountL:
 +      return UsePopCountInstruction;
 +
-+    case Op_RotateRight:
-+    case Op_RotateLeft:
 +    case Op_CountLeadingZerosI:
 +    case Op_CountLeadingZerosL:
 +    case Op_CountTrailingZerosI:
 +    case Op_CountTrailingZerosL:
-+      return UseRVB;
++      return UseZbb;
 +  }
 +
 +  return true; // Per default match rules are supported.
 +}
 +
 +// Identify extra cases that we might want to provide match rules for vector nodes and
-+// other intrinsics guarded with vector length (vlen) and element type (bt).
-+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
-+  if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
-+    return false;
-+  }
-+
-+  return op_vec_supported(opcode);
-+}
-+
-+const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
++// other intrinsics guarded with vector length (vlen).
++const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
 +  return false;
 +}
 +
-+const RegMask* Matcher::predicate_reg_mask(void) {
-+  return NULL;
-+}
-+
-+const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) {
-+  return NULL;
++const bool Matcher::has_predicated_vectors(void) {
++  return false;
 +}
 +
-+// Vector calling convention not yet implemented.
-+const bool Matcher::supports_vector_calling_convention(void) {
-+  return false;
++const int Matcher::float_pressure(int default_pressure_threshold) {
++  return default_pressure_threshold;
 +}
 +
-+OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
++int Matcher::regnum_to_fpu_offset(int regnum)
++{
 +  Unimplemented();
-+  return OptoRegPair(0, 0);
++  return 0;
 +}
 +
 +// Is this branch offset short enough that a short branch can be used?
@@ -31719,17 +29267,23 @@ index 00000000000..588887e1d96
 +  return (-4096 <= offs && offs < 4096);
 +}
 +
-+// Vector width in bytes.
-+const int Matcher::vector_width_in_bytes(BasicType bt) {
-+  if (UseRVV) {
-+    // The MaxVectorSize should have been set by detecting RVV max vector register size when check UseRVV.
-+    // MaxVectorSize == VM_Version::_initial_vector_length
-+    return MaxVectorSize;
-+  }
-+  return 0;
++const bool Matcher::isSimpleConstant64(jlong value) {
++  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
++  // Probably always true, even if a temp register is required.
++  return true;
 +}
 +
-+// Limits on vector size (number of elements) loaded into vector.
++// true just means we have fast l2f conversion
++const bool Matcher::convL2FSupported(void) {
++  return true;
++}
++
++// Vector width in bytes.
++const int Matcher::vector_width_in_bytes(BasicType bt) {
++  return 0;
++}
++
++// Limits on vector size (number of elements) loaded into vector.
 +const int Matcher::max_vector_size(const BasicType bt) {
 +  return vector_width_in_bytes(bt) / type2aelembytes(bt);
 +}
@@ -31739,34 +29293,108 @@ index 00000000000..588887e1d96
 +
 +// Vector ideal reg.
 +const uint Matcher::vector_ideal_reg(int len) {
-+  assert(MaxVectorSize >= len, "");
-+  if (UseRVV) {
-+    return Op_VecA;
-+  }
-+
 +  ShouldNotReachHere();
 +  return 0;
 +}
 +
-+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
-+  return Matcher::max_vector_size(bt);
++const uint Matcher::vector_shift_count_ideal_reg(int size) {
++  fatal("vector shift is not supported");
++  return Node::NotAMachineReg;
++}
++
++// AES support not yet implemented
++const bool Matcher::pass_original_key_for_aes() {
++  return false;
 +}
 +
-+MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, uint ideal_reg, bool is_temp) {
-+  ShouldNotReachHere(); // generic vector operands not supported
-+  return NULL;
++// RISC-V supports misaligned vectors store/load.
++const bool Matcher::misaligned_vectors_ok() {
++  return true;
 +}
 +
-+bool Matcher::is_reg2reg_move(MachNode* m) {
-+  ShouldNotReachHere(); // generic vector operands not supported
-+  return false;
++// false => size gets scaled to BytesPerLong, ok.
++const bool Matcher::init_array_count_is_in_bytes = false;
++
++// Use conditional move (CMOVL)
++const int Matcher::long_cmove_cost() {
++  // long cmoves are no more expensive than int cmoves
++  return 0;
 +}
 +
-+bool Matcher::is_generic_vector(MachOper* opnd) {
-+  ShouldNotReachHere(); // generic vector operands not supported
++const int Matcher::float_cmove_cost() {
++  // float cmoves are no more expensive than int cmoves
++  return 0;
++}
++
++// Does the CPU require late expand (see block.cpp for description of late expand)?
++const bool Matcher::require_postalloc_expand = false;
++
++// Do we need to mask the count passed to shift instructions or does
++// the cpu only look at the lower 5/6 bits anyway?
++const bool Matcher::need_masked_shift_count = false;
++
++// This affects two different things:
++//  - how Decode nodes are matched
++//  - how ImplicitNullCheck opportunities are recognized
++// If true, the matcher will try to remove all Decodes and match them
++// (as operands) into nodes. NullChecks are not prepared to deal with
++// Decodes by final_graph_reshaping().
++// If false, final_graph_reshaping() forces the decode behind the Cmp
++// for a NullCheck. The matcher matches the Decode node into a register.
++// Implicit_null_check optimization moves the Decode along with the
++// memory operation back up before the NullCheck.
++bool Matcher::narrow_oop_use_complex_address() {
++  return Universe::narrow_oop_shift() == 0;
++}
++
++bool Matcher::narrow_klass_use_complex_address() {
++// TODO
++// decide whether we need to set this to true
 +  return false;
 +}
 +
++bool Matcher::const_oop_prefer_decode() {
++  // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
++  return Universe::narrow_oop_base() == NULL;
++}
++
++bool Matcher::const_klass_prefer_decode() {
++  // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
++  return Universe::narrow_klass_base() == NULL;
++}
++
++// Is it better to copy float constants, or load them directly from
++// memory?  Intel can load a float constant from a direct address,
++// requiring no extra registers.  Most RISCs will have to materialize
++// an address into a register first, so they would do better to copy
++// the constant from stack.
++const bool Matcher::rematerialize_float_constants = false;
++
++// If CPU can load and store mis-aligned doubles directly then no
++// fixup is needed.  Else we split the double into 2 integer pieces
++// and move it piece-by-piece.  Only happens when passing doubles into
++// C code as the Java calling convention forces doubles to be aligned.
++const bool Matcher::misaligned_doubles_ok = true;
++
++// No-op on amd64
++void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
++  Unimplemented();
++}
++
++// Advertise here if the CPU requires explicit rounding operations to
++// implement the UseStrictFP mode.
++const bool Matcher::strict_fp_requires_explicit_rounding = false;
++
++// Are floats converted to double when stored to stack during
++// deoptimization?
++bool Matcher::float_in_double() { return false; }
++
++// Do ints take an entire long register or just half?
++// The relevant question is how the int is callee-saved:
++// the whole long is written but de-opt'ing will have to extract
++// the relevant 32 bits.
++const bool Matcher::int_in_long = true;
++
 +// Return whether or not this register is ever used as an argument.
 +// This function is used on startup to build the trampoline stubs in
 +// generateOptoStub.  Registers not mentioned will be killed by the VM
@@ -31798,33 +29426,6 @@ index 00000000000..588887e1d96
 +  return can_be_java_arg(reg);
 +}
 +
-+uint Matcher::int_pressure_limit()
-+{
-+  // A derived pointer is live at CallNode and then is flagged by RA
-+  // as a spilled LRG. Spilling heuristics(Spill-USE) explicitly skip
-+  // derived pointers and lastly fail to spill after reaching maximum
-+  // number of iterations. Lowering the default pressure threshold to
-+  // (_NO_SPECIAL_REG32_mask.Size() minus 1) forces CallNode to become
-+  // a high register pressure area of the code so that split_DEF can
-+  // generate DefinitionSpillCopy for the derived pointer.
-+  uint default_int_pressure_threshold = _NO_SPECIAL_REG32_mask.Size() - 1;
-+  if (!PreserveFramePointer) {
-+    // When PreserveFramePointer is off, frame pointer is allocatable,
-+    // but different from other SOC registers, it is excluded from
-+    // fatproj's mask because its save type is No-Save. Decrease 1 to
-+    // ensure high pressure at fatproj when PreserveFramePointer is off.
-+    // See check_pressure_at_fatproj().
-+    default_int_pressure_threshold--;
-+  }
-+  return (INTPRESSURE == -1) ? default_int_pressure_threshold : INTPRESSURE;
-+}
-+
-+uint Matcher::float_pressure_limit()
-+{
-+  // _FLOAT_REG_mask is generated by adlc from the float_reg register class.
-+  return (FLOATPRESSURE == -1) ? _FLOAT_REG_mask.Size() : FLOATPRESSURE;
-+}
-+
 +bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
 +  return false;
 +}
@@ -31871,23 +29472,18 @@ index 00000000000..588887e1d96
 +  return true;
 +}
 +
-+// Should the Matcher clone input 'm' of node 'n'?
-+bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
-+  assert_cond(m != NULL);
-+  if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
-+    mstack.push(m, Visit);           // m = ShiftCntV
-+    return true;
-+  }
-+  return false;
-+}
++const bool Matcher::convi2l_type_required = false;
 +
 +// Should the Matcher clone shifts on addressing modes, expecting them
 +// to be subsumed into complex addressing expressions or compute them
 +// into registers?
-+bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
++bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 +  return clone_base_plus_offset_address(m, mstack, address_visited);
 +}
 +
++void Compile::reshape_address(AddPNode* addp) {
++}
++
 +%}
 +
 +
@@ -31922,15 +29518,15 @@ index 00000000000..588887e1d96
 +  // BEGIN Non-volatile memory access
 +
 +  enc_class riscv_enc_li_imm(iRegIorL dst, immIorL src) %{
-+    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
 +    Assembler::CompressibleRegion cr(&_masm);
 +    int64_t con = (int64_t)$src$$constant;
 +    Register dst_reg = as_Register($dst$$reg);
-+    __ li(dst_reg, con);
++    __ mv(dst_reg, con);
 +  %}
 +
 +  enc_class riscv_enc_mov_p(iRegP dst, immP src) %{
-+    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
 +    Register dst_reg = as_Register($dst$$reg);
 +    address con = (address)$src$$constant;
 +    if (con == NULL || con == (address)1) {
@@ -31943,25 +29539,36 @@ index 00000000000..588887e1d96
 +        __ mov_metadata(dst_reg, (Metadata*)con);
 +      } else {
 +        assert(rtype == relocInfo::none, "unexpected reloc type");
-+        __ li(dst_reg, $src$$constant);
++        __ mv(dst_reg, $src$$constant);
 +      }
 +    }
 +  %}
 +
 +  enc_class riscv_enc_mov_p1(iRegP dst) %{
-+    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
 +    Assembler::CompressibleRegion cr(&_masm);
 +    Register dst_reg = as_Register($dst$$reg);
-+    __ li(dst_reg, 1);
++    __ mv(dst_reg, 1);
++  %}
++
++  enc_class riscv_enc_mov_poll_page(iRegP dst, immPollPage src) %{
++    MacroAssembler _masm(&cbuf);
++    int32_t offset = 0;
++    address page = (address)$src$$constant;
++    unsigned long align = (unsigned long)page & 0xfff;
++    assert(align == 0, "polling page must be page aligned");
++    Register dst_reg = as_Register($dst$$reg);
++    __ la_patchable(dst_reg, Address(page, relocInfo::poll_type), offset);
++    __ addi(dst_reg, dst_reg, offset);
 +  %}
 +
 +  enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{
-+    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
 +    __ load_byte_map_base($dst$$Register);
 +  %}
 +
 +  enc_class riscv_enc_mov_n(iRegN dst, immN src) %{
-+    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
 +    Register dst_reg = as_Register($dst$$reg);
 +    address con = (address)$src$$constant;
 +    if (con == NULL) {
@@ -31974,13 +29581,13 @@ index 00000000000..588887e1d96
 +  %}
 +
 +  enc_class riscv_enc_mov_zero(iRegNorP dst) %{
-+    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
 +    Register dst_reg = as_Register($dst$$reg);
 +    __ mv(dst_reg, zr);
 +  %}
 +
 +  enc_class riscv_enc_mov_nk(iRegN dst, immNKlass src) %{
-+    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
 +    Register dst_reg = as_Register($dst$$reg);
 +    address con = (address)$src$$constant;
 +    if (con == NULL) {
@@ -31992,43 +29599,43 @@ index 00000000000..588887e1d96
 +    }
 +  %}
 +
-+  enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
-+    C2_MacroAssembler _masm(&cbuf);
++  enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{
++    MacroAssembler _masm(&cbuf);
 +    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
 +               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
 +               /*result as bool*/ true);
 +  %}
 +
-+  enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
-+    C2_MacroAssembler _masm(&cbuf);
++  enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{
++    MacroAssembler _masm(&cbuf);
 +    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
 +               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
 +               /*result as bool*/ true);
 +  %}
 +
-+  enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
-+    C2_MacroAssembler _masm(&cbuf);
++  enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegL oldval, iRegL newval) %{
++    MacroAssembler _masm(&cbuf);
 +    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
 +               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
 +               /*result as bool*/ true);
 +  %}
 +
-+  enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
-+    C2_MacroAssembler _masm(&cbuf);
++  enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{
++    MacroAssembler _masm(&cbuf);
 +    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
 +               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
 +               /*result as bool*/ true);
 +  %}
 +
-+  enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
-+    C2_MacroAssembler _masm(&cbuf);
++  enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{
++    MacroAssembler _masm(&cbuf);
 +    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
 +               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
 +               /*result as bool*/ true);
 +  %}
 +
-+  enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
-+    C2_MacroAssembler _masm(&cbuf);
++  enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegL oldval, iRegL newval) %{
++    MacroAssembler _masm(&cbuf);
 +    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
 +               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
 +               /*result as bool*/ true);
@@ -32037,13 +29644,13 @@ index 00000000000..588887e1d96
 +  // compare and branch instruction encodings
 +
 +  enc_class riscv_enc_j(label lbl) %{
-+    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
 +    Label* L = $lbl$$label;
 +    __ j(*L);
 +  %}
 +
 +  enc_class riscv_enc_far_cmpULtGe_imm0_branch(cmpOpULtGe cmp, iRegIorL op1, label lbl) %{
-+    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
 +    Label* L = $lbl$$label;
 +    switch ($cmp$$cmpcode) {
 +      case(BoolTest::ge):
@@ -32067,7 +29674,7 @@ index 00000000000..588887e1d96
 +
 +    Label miss;
 +    Label done;
-+    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
 +    __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
 +                                     NULL, &miss);
 +    if ($primary) {
@@ -32079,14 +29686,14 @@ index 00000000000..588887e1d96
 +
 +    __ bind(miss);
 +    if (!$primary) {
-+      __ li(cr_reg, 1);
++      __ mv(cr_reg, 1);
 +    }
 +
 +    __ bind(done);
 +  %}
 +
 +  enc_class riscv_enc_java_static_call(method meth) %{
-+    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
 +
 +    address addr = (address)$meth$$method;
 +    address call = NULL;
@@ -32118,7 +29725,7 @@ index 00000000000..588887e1d96
 +  %}
 +
 +  enc_class riscv_enc_java_dynamic_call(method meth) %{
-+    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
 +    int method_index = resolved_method_index(cbuf);
 +    address call = __ ic_call((address)$meth$$method, method_index);
 +    if (call == NULL) {
@@ -32128,7 +29735,7 @@ index 00000000000..588887e1d96
 +  %}
 +
 +  enc_class riscv_enc_call_epilog() %{
-+    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
 +    if (VerifyStackAtCalls) {
 +      // Check that stack depth is unchanged: find majik cookie on stack
 +      __ call_Unimplemented();
@@ -32136,7 +29743,7 @@ index 00000000000..588887e1d96
 +  %}
 +
 +  enc_class riscv_enc_java_to_runtime(method meth) %{
-+    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
 +
 +    // some calls to generated routines (arraycopy code) are scheduled
 +    // by C2 as runtime calls. if so we can call them using a jr (they
@@ -32164,8 +29771,8 @@ index 00000000000..588887e1d96
 +  %}
 +
 +  // using the cr register as the bool result: 0 for success; others failed.
-+  enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{
-+    C2_MacroAssembler _masm(&cbuf);
++  enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2) %{
++    MacroAssembler _masm(&cbuf);
 +    Register flag = t1;
 +    Register oop = as_Register($object$$reg);
 +    Register box = as_Register($box$$reg);
@@ -32179,87 +29786,80 @@ index 00000000000..588887e1d96
 +    // Load markWord from object into displaced_header.
 +    __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
 +
-+    if (DiagnoseSyncOnValueBasedClasses != 0) {
-+      __ load_klass(flag, oop);
-+      __ lwu(flag, Address(flag, Klass::access_flags_offset()));
-+      __ andi(flag, flag, JVM_ACC_IS_VALUE_BASED_CLASS, tmp /* tmp */);
-+      __ bnez(flag, cont, true /* is_far */);
++    // Always do locking in runtime.
++    if (EmitSync & 0x01) {
++      __ mv(flag, 1);
++      return;
++    }
++
++    if (UseBiasedLocking && !UseOptoBiasInlining) {
++      __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont, /*slow_case*/NULL, NULL, flag);
 +    }
 +
 +    // Check for existing monitor
-+    __ andi(t0, disp_hdr, markWord::monitor_value);
-+    __ bnez(t0, object_has_monitor);
++    if ((EmitSync & 0x02) == 0) {
++      __ andi(t0, disp_hdr, markOopDesc::monitor_value);
++      __ bnez(t0, object_has_monitor);
++    }
 +
-+    if (!UseHeavyMonitors) {
-+      // Set tmp to be (markWord of object | UNLOCK_VALUE).
-+      __ ori(tmp, disp_hdr, markWord::unlocked_value);
++    // Set tmp to be (markWord of object | UNLOCK_VALUE).
++    __ ori(tmp, disp_hdr, markOopDesc::unlocked_value);
 +
-+      // Initialize the box. (Must happen before we update the object mark!)
-+      __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
++    // Initialize the box. (Must happen before we update the object mark!)
++    __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
 +
-+      // Compare object markWord with an unlocked value (tmp) and if
-+      // equal exchange the stack address of our box with object markWord.
-+      // On failure disp_hdr contains the possibly locked markWord.
-+      __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq,
-+                 Assembler::rl, /*result*/disp_hdr);
-+      __ mv(flag, zr);
-+      __ beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas
-+
-+      assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
-+
-+      // If the compare-and-exchange succeeded, then we found an unlocked
-+      // object, will have now locked it will continue at label cont
-+      // We did not see an unlocked object so try the fast recursive case.
-+
-+      // Check if the owner is self by comparing the value in the
-+      // markWord of object (disp_hdr) with the stack pointer.
-+      __ sub(disp_hdr, disp_hdr, sp);
-+      __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markWord::lock_mask_in_place));
-+      // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont,
-+      // hence we can store 0 as the displaced header in the box, which indicates that it is a
-+      // recursive lock.
-+      __ andr(tmp/*==0?*/, disp_hdr, tmp);
-+      __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
-+      __ mv(flag, tmp); // we can use the value of tmp as the result here
-+    } else {
-+      __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow-path
-+    }
++    // Compare object markWord with an unlocked value (tmp) and if
++    // equal exchange the stack address of our box with object markWord.
++    // On failure disp_hdr contains the possibly locked markWord.
++    __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq,
++               Assembler::rl, /*result*/disp_hdr);
++    __ mv(flag, zr);
++    __ beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas
 +
-+    __ j(cont);
++    assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
 +
-+    // Handle existing monitor.
-+    __ bind(object_has_monitor);
-+    // The object's monitor m is unlocked iff m->owner == NULL,
-+    // otherwise m->owner may contain a thread or a stack address.
-+    //
-+    // Try to CAS m->owner from NULL to current thread.
-+    __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markWord::monitor_value));
-+    __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq,
-+             Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected)
-+
-+    // Store a non-null value into the box to avoid looking like a re-entrant
-+    // lock. The fast-path monitor unlock code checks for
-+    // markWord::monitor_value so use markWord::unused_mark which has the
-+    // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
-+    __ mv(tmp, (address)markWord::unused_mark().value());
-+    __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
++    // If the compare-and-exchange succeeded, then we found an unlocked
++    // object, will have now locked it will continue at label cont
++    // We did not see an unlocked object so try the fast recursive case.
 +
-+    __ beqz(flag, cont); // CAS success means locking succeeded
++    // Check if the owner is self by comparing the value in the
++    // markWord of object (disp_hdr) with the stack pointer.
++    __ sub(disp_hdr, disp_hdr, sp);
++    __ mv(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place));
++    // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont,
++    // hence we can store 0 as the displaced header in the box, which indicates that it is a
++    // recursive lock.
++    __ andr(tmp/*==0?*/, disp_hdr, tmp);
++    __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
++    __ mv(flag, tmp); // we can use the value of tmp as the result here
 +
-+    __ bne(flag, xthread, cont); // Check for recursive locking
++    if ((EmitSync & 0x02) == 0) {
++      __ j(cont);
 +
-+    // Recursive lock case
-+    __ mv(flag, zr);
-+    __ ld(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value));
-+    __ add(tmp, tmp, 1u);
-+    __ sd(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value));
++      // Handle existing monitor.
++      __ bind(object_has_monitor);
++      // The object's monitor m is unlocked iff m->owner == NULL,
++      // otherwise m->owner may contain a thread or a stack address.
++      //
++      // Try to CAS m->owner from NULL to current thread.
++      __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value));
++      __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq,
++                 Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected)
++
++      // Store a non-null value into the box to avoid looking like a re-entrant
++      // lock. The fast-path monitor unlock code checks for
++      // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the
++      // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
++      __ mv(tmp, (address)markOopDesc::unused_mark());
++      __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
++    }
 +
 +    __ bind(cont);
 +  %}
 +
 +  // using cr flag to indicate the fast_unlock result: 0 for success; others failed.
-+  enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{
-+    C2_MacroAssembler _masm(&cbuf);
++  enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2) %{
++    MacroAssembler _masm(&cbuf);
 +    Register flag = t1;
 +    Register oop = as_Register($object$$reg);
 +    Register box = as_Register($box$$reg);
@@ -32270,59 +29870,61 @@ index 00000000000..588887e1d96
 +
 +    assert_different_registers(oop, box, tmp, disp_hdr, flag);
 +
-+    if (!UseHeavyMonitors) {
-+      // Find the lock address and load the displaced header from the stack.
-+      __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
++    // Always do locking in runtime.
++    if (EmitSync & 0x01) {
++      __ mv(flag, 1);
++      return;
++    }
 +
-+      // If the displaced header is 0, we have a recursive unlock.
-+      __ mv(flag, disp_hdr);
-+      __ beqz(disp_hdr, cont);
++    if (UseBiasedLocking && !UseOptoBiasInlining) {
++      __ biased_locking_exit(oop, tmp, cont, flag);
 +    }
 +
++    // Find the lock address and load the displaced header from the stack.
++    __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
++
++    // If the displaced header is 0, we have a recursive unlock.
++    __ mv(flag, disp_hdr);
++    __ beqz(disp_hdr, cont);
++
 +    // Handle existing monitor.
-+    __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
-+    __ andi(t0, disp_hdr, markWord::monitor_value);
-+    __ bnez(t0, object_has_monitor);
-+
-+    if (!UseHeavyMonitors) {
-+      // Check if it is still a light weight lock, this is true if we
-+      // see the stack address of the basicLock in the markWord of the
-+      // object.
-+
-+      __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed,
-+                 Assembler::rl, /*result*/tmp);
-+      __ xorr(flag, box, tmp); // box == tmp if cas succeeds
-+    } else {
-+      __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow path
++    if ((EmitSync & 0x02) == 0) {
++      __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
++      __ andi(t0, tmp, markOopDesc::monitor_value);
++      __ bnez(t0, object_has_monitor);
 +    }
++
++    // Check if it is still a light weight lock, this is true if we
++    // see the stack address of the basicLock in the markWord of the
++    // object.
++
++    __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed,
++               Assembler::rl, /*result*/tmp);
++    __ xorr(flag, box, tmp); // box == tmp if cas succeeds
 +    __ j(cont);
 +
 +    assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
 +
 +    // Handle existing monitor.
-+    __ bind(object_has_monitor);
-+    STATIC_ASSERT(markWord::monitor_value <= INT_MAX);
-+    __ add(tmp, tmp, -(int)markWord::monitor_value); // monitor
-+    __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
-+
-+    Label notRecursive;
-+    __ beqz(disp_hdr, notRecursive); // Will be 0 if not recursive.
-+
-+    // Recursive lock
-+    __ addi(disp_hdr, disp_hdr, -1);
-+    __ sd(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
-+    __ mv(flag, zr);
-+    __ j(cont);
++    if ((EmitSync & 0x02) == 0) {
++      __ bind(object_has_monitor);
++      STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX);
++      __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor
++      __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
++      __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
++      __ xorr(flag, flag, xthread); // Will be 0 if we are the owner.
++      __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions
++      __ bnez(flag, cont);
 +
-+    __ bind(notRecursive);
-+    __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
-+    __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
-+    __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0.
-+    __ bnez(flag, cont);
-+    // need a release store here
-+    __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
-+    __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
-+    __ sd(zr, Address(tmp)); // set unowned
++      __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
++      __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
++      __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0.
++      __ bnez(flag, cont);
++      // need a release store here
++      __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
++      __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
++      __ sd(zr, Address(tmp)); // set unowned
++    }
 +
 +    __ bind(cont);
 +  %}
@@ -32330,7 +29932,7 @@ index 00000000000..588887e1d96
 +  // arithmetic encodings
 +
 +  enc_class riscv_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
-+    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
 +    Register dst_reg = as_Register($dst$$reg);
 +    Register src1_reg = as_Register($src1$$reg);
 +    Register src2_reg = as_Register($src2$$reg);
@@ -32338,7 +29940,7 @@ index 00000000000..588887e1d96
 +  %}
 +
 +  enc_class riscv_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
-+    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
 +    Register dst_reg = as_Register($dst$$reg);
 +    Register src1_reg = as_Register($src1$$reg);
 +    Register src2_reg = as_Register($src2$$reg);
@@ -32346,7 +29948,7 @@ index 00000000000..588887e1d96
 +  %}
 +
 +  enc_class riscv_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
-+    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
 +    Register dst_reg = as_Register($dst$$reg);
 +    Register src1_reg = as_Register($src1$$reg);
 +    Register src2_reg = as_Register($src2$$reg);
@@ -32354,7 +29956,7 @@ index 00000000000..588887e1d96
 +  %}
 +
 +  enc_class riscv_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
-+    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
 +    Register dst_reg = as_Register($dst$$reg);
 +    Register src1_reg = as_Register($src1$$reg);
 +    Register src2_reg = as_Register($src2$$reg);
@@ -32362,14 +29964,14 @@ index 00000000000..588887e1d96
 +  %}
 +
 +  enc_class riscv_enc_tail_call(iRegP jump_target) %{
-+    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
 +    Assembler::CompressibleRegion cr(&_masm);
 +    Register target_reg = as_Register($jump_target$$reg);
 +    __ jr(target_reg);
 +  %}
 +
 +  enc_class riscv_enc_tail_jmp(iRegP jump_target) %{
-+    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
 +    Assembler::CompressibleRegion cr(&_masm);
 +    Register target_reg = as_Register($jump_target$$reg);
 +    // exception oop should be in x10
@@ -32380,12 +29982,12 @@ index 00000000000..588887e1d96
 +  %}
 +
 +  enc_class riscv_enc_rethrow() %{
-+    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
 +    __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
 +  %}
 +
 +  enc_class riscv_enc_ret() %{
-+    C2_MacroAssembler _masm(&cbuf);
++    MacroAssembler _masm(&cbuf);
 +    Assembler::CompressibleRegion cr(&_masm);
 +    __ ret();
 +  %}
@@ -32451,12 +30053,18 @@ index 00000000000..588887e1d96
 +//         SP meets the minimum alignment.
 +
 +frame %{
++  // What direction does stack grow in (assumed to be same for C & Java)
++  stack_direction(TOWARDS_LOW);
++
 +  // These three registers define part of the calling convention
 +  // between compiled code and the interpreter.
 +
 +  // Inline Cache Register or methodOop for I2C.
 +  inline_cache_reg(R31);
 +
++  // Method Oop Register when calling interpreter.
++  interpreter_method_oop_reg(R31);
++
 +  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
 +  cisc_spilling_operand_name(indOffset);
 +
@@ -32476,6 +30084,12 @@ index 00000000000..588887e1d96
 +  // Stack alignment requirement
 +  stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 +
++  // Number of stack slots between incoming argument block and the start of
++  // a new frame.  The PROLOG must add this many slots to the stack.  The
++  // EPILOG must remove this many slots. RISC-V needs two slots for
++  // return address and fp.
++  in_preserve_stack_slots(2 * VMRegImpl::slots_per_word);
++
 +  // Number of outgoing stack slots killed above the out_preserve_stack_slots
 +  // for calls to C.  Supports the var-args backing area for register parms.
 +  varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes / BytesPerInt);
@@ -32494,6 +30108,25 @@ index 00000000000..588887e1d96
 +                        Compile::current()->fixed_slots()),
 +                       stack_alignment_in_slots()));
 +
++  // Body of function which returns an integer array locating
++  // arguments either in registers or in stack slots.  Passed an array
++  // of ideal registers called "sig" and a "length" count.  Stack-slot
++  // offsets are based on outgoing arguments, i.e. a CALLER setting up
++  // arguments for a CALLEE.  Incoming stack arguments are
++  // automatically biased by the preserve_stack_slots field above.
++
++  calling_convention
++  %{
++    // No difference between ingoing/outgoing just pass false
++    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
++  %}
++
++  c_calling_convention
++  %{
++    // This is obviously always outgoing
++    (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
++  %}
++
 +  // Location of compiled Java return values.  Same as C for now.
 +  return_value
 +  %{
@@ -32750,13 +30383,23 @@ index 00000000000..588887e1d96
 +  interface(CONST_INTER);
 +%}
 +
++// Polling Page Pointer Immediate
++operand immPollPage()
++%{
++  predicate((address)n->get_ptr() == os::get_polling_page());
++  match(ConP);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
 +// Card Table Byte Map Base
 +operand immByteMapBase()
 +%{
 +  // Get base of card map
 +  predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
-+            (CardTable::CardValue*)n->get_ptr() ==
-+             ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
++            (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
 +  match(ConP);
 +
 +  op_cost(0);
@@ -32774,6 +30417,14 @@ index 00000000000..588887e1d96
 +  interface(CONST_INTER);
 +%}
 +
++operand immIpowerOf2() %{
++  predicate(is_power_of_2((juint)(n->get_int())));
++  match(ConI);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
 +// Long Immediate: low 32-bit mask
 +operand immL_32bits()
 +%{
@@ -33228,67 +30879,6 @@ index 00000000000..588887e1d96
 +  interface(REG_INTER);
 +%}
 +
-+// Generic vector class. This will be used for
-+// all vector operands.
-+operand vReg()
-+%{
-+  constraint(ALLOC_IN_RC(vectora_reg));
-+  match(VecA);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+operand vReg_V1()
-+%{
-+  constraint(ALLOC_IN_RC(v1_reg));
-+  match(VecA);
-+  match(vReg);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+operand vReg_V2()
-+%{
-+  constraint(ALLOC_IN_RC(v2_reg));
-+  match(VecA);
-+  match(vReg);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+operand vReg_V3()
-+%{
-+  constraint(ALLOC_IN_RC(v3_reg));
-+  match(VecA);
-+  match(vReg);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+operand vReg_V4()
-+%{
-+  constraint(ALLOC_IN_RC(v4_reg));
-+  match(VecA);
-+  match(vReg);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+operand vReg_V5()
-+%{
-+  constraint(ALLOC_IN_RC(v5_reg));
-+  match(VecA);
-+  match(vReg);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
 +// Java Thread Register
 +operand javaThread_RegP(iRegP reg)
 +%{
@@ -33346,7 +30936,7 @@ index 00000000000..588887e1d96
 +
 +operand indirectN(iRegN reg)
 +%{
-+  predicate(CompressedOops::shift() == 0);
++  predicate(Universe::narrow_oop_shift() == 0);
 +  constraint(ALLOC_IN_RC(ptr_reg));
 +  match(DecodeN reg);
 +  op_cost(0);
@@ -33361,7 +30951,7 @@ index 00000000000..588887e1d96
 +
 +operand indOffIN(iRegN reg, immIOffset off)
 +%{
-+  predicate(CompressedOops::shift() == 0);
++  predicate(Universe::narrow_oop_shift() == 0);
 +  constraint(ALLOC_IN_RC(ptr_reg));
 +  match(AddP (DecodeN reg) off);
 +  op_cost(0);
@@ -33376,7 +30966,7 @@ index 00000000000..588887e1d96
 +
 +operand indOffLN(iRegN reg, immLOffset off)
 +%{
-+  predicate(CompressedOops::shift() == 0);
++  predicate(Universe::narrow_oop_shift() == 0);
 +  constraint(ALLOC_IN_RC(ptr_reg));
 +  match(AddP (DecodeN reg) off);
 +  op_cost(0);
@@ -33569,13 +31159,13 @@ index 00000000000..588887e1d96
 +  format %{ "" %}
 +  interface(COND_INTER) %{
 +    equal(0x0, "eq");
-+    greater(0x1, "gt");
++    greater(0x1, "gtu");
 +    overflow(0x2, "overflow");
-+    less(0x3, "lt");
++    less(0x3, "ltu");
 +    not_equal(0x4, "ne");
-+    less_equal(0x5, "le");
++    less_equal(0x5, "leu");
 +    no_overflow(0x6, "no_overflow");
-+    greater_equal(0x7, "ge");
++    greater_equal(0x7, "geu");
 +  %}
 +%}
 +
@@ -33591,13 +31181,13 @@ index 00000000000..588887e1d96
 +  format %{ "" %}
 +  interface(COND_INTER) %{
 +    equal(0x0, "eq");
-+    greater(0x1, "gt");
++    greater(0x1, "gtu");
 +    overflow(0x2, "overflow");
-+    less(0x3, "lt");
++    less(0x3, "ltu");
 +    not_equal(0x4, "ne");
-+    less_equal(0x5, "le");
++    less_equal(0x5, "leu");
 +    no_overflow(0x6, "no_overflow");
-+    greater_equal(0x7, "ge");
++    greater_equal(0x7, "geu");
 +  %}
 +%}
 +
@@ -34081,7 +31671,7 @@ index 00000000000..588887e1d96
 +  LDST   : MEM;
 +%}
 +
-+//------- Store pipeline operations -----------------------
++//------- Control transfer pipeline operations ------------
 +
 +// Store - zr, mem
 +// E.g. SD    zr, mem
@@ -34444,7 +32034,6 @@ index 00000000000..588887e1d96
 +instruct loadP(iRegPNoSp dst, memory mem)
 +%{
 +  match(Set dst (LoadP mem));
-+  predicate(n->as_Load()->barrier_data() == 0);
 +
 +  ins_cost(LOAD_COST);
 +  format %{ "ld  $dst, $mem\t# ptr, #@loadP" %}
@@ -34599,6 +32188,19 @@ index 00000000000..588887e1d96
 +  ins_pipe(ialu_imm);
 +%}
 +
++// Load Poll Page Constant
++instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
++%{
++  match(Set dst con);
++
++  ins_cost(ALU_COST * 6);
++  format %{ "movptr  $dst, $con\t# Poll Page Ptr, #@loadConPollPage" %}
++
++  ins_encode(riscv_enc_mov_poll_page(dst, con));
++
++  ins_pipe(ialu_imm);
++%}
++
 +// Load Byte Map Base Constant
 +instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
 +%{
@@ -34715,6 +32317,7 @@ index 00000000000..588887e1d96
 +instruct storeimmCM0(immI0 zero, memory mem)
 +%{
 +  match(Set mem (StoreCM mem zero));
++  predicate(unnecessary_storestore(n));
 +
 +  ins_cost(STORE_COST);
 +  format %{ "storestore (elided)\n\t"
@@ -34914,6 +32517,8 @@ index 00000000000..588887e1d96
 +instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
 +%{
 +  match(Set mem (StoreN mem zero));
++  predicate(Universe::narrow_oop_base() == NULL &&
++            Universe::narrow_klass_base() == NULL);
 +
 +  ins_cost(STORE_COST);
 +  format %{ "sw  rheapbase, $mem\t# compressed ptr (rheapbase==0), #@storeImmN0" %}
@@ -35036,7 +32641,11 @@ index 00000000000..588887e1d96
 +  ins_pipe(pipe_serial);
 +%}
 +
-+instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
++// storeLConditional is used by PhaseMacroExpand::expand_lock_node
++// when attempting to rebias a lock towards the current thread.  We
++// must use the acquire form of cmpxchg in order to guarantee acquire
++// semantics in this case.
++instruct storeLConditional(indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr)
 +%{
 +  match(Set cr (StoreLConditional mem (Binary oldval newval)));
 +
@@ -35058,7 +32667,7 @@ index 00000000000..588887e1d96
 +
 +// storeIConditional also has acquire semantics, for no better reason
 +// than matching storeLConditional.
-+instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
++instruct storeIConditional(indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr)
 +%{
 +  match(Set cr (StoreIConditional mem (Binary oldval newval)));
 +
@@ -35081,7 +32690,7 @@ index 00000000000..588887e1d96
 +// standard CompareAndSwapX when we are using barriers
 +// these have higher priority than the rules selected by a predicate
 +instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                         iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++                         iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 +%{
 +  match(Set res (CompareAndSwapB mem (Binary oldval newval)));
 +
@@ -35104,7 +32713,7 @@ index 00000000000..588887e1d96
 +%}
 +
 +instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                         iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++                         iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 +%{
 +  match(Set res (CompareAndSwapS mem (Binary oldval newval)));
 +
@@ -35126,7 +32735,7 @@ index 00000000000..588887e1d96
 +  ins_pipe(pipe_slow);
 +%}
 +
-+instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval)
++instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
 +%{
 +  match(Set res (CompareAndSwapI mem (Binary oldval newval)));
 +
@@ -35142,7 +32751,7 @@ index 00000000000..588887e1d96
 +  ins_pipe(pipe_slow);
 +%}
 +
-+instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval)
++instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
 +%{
 +  match(Set res (CompareAndSwapL mem (Binary oldval newval)));
 +
@@ -35160,8 +32769,6 @@ index 00000000000..588887e1d96
 +
 +instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
 +%{
-+  predicate(n->as_LoadStore()->barrier_data() == 0);
-+
 +  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
 +
 +  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
@@ -35176,7 +32783,7 @@ index 00000000000..588887e1d96
 +  ins_pipe(pipe_slow);
 +%}
 +
-+instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval)
++instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
 +%{
 +  match(Set res (CompareAndSwapN mem (Binary oldval newval)));
 +
@@ -35194,7 +32801,7 @@ index 00000000000..588887e1d96
 +
 +// alternative CompareAndSwapX when we are eliding barriers
 +instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                            iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++                            iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 +%{
 +  predicate(needs_acquiring_load_reserved(n));
 +
@@ -35219,7 +32826,7 @@ index 00000000000..588887e1d96
 +%}
 +
 +instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                            iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++                            iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 +%{
 +  predicate(needs_acquiring_load_reserved(n));
 +
@@ -35243,7 +32850,7 @@ index 00000000000..588887e1d96
 +  ins_pipe(pipe_slow);
 +%}
 +
-+instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval)
++instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
 +%{
 +  predicate(needs_acquiring_load_reserved(n));
 +
@@ -35261,7 +32868,7 @@ index 00000000000..588887e1d96
 +  ins_pipe(pipe_slow);
 +%}
 +
-+instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval)
++instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
 +%{
 +  predicate(needs_acquiring_load_reserved(n));
 +
@@ -35281,7 +32888,7 @@ index 00000000000..588887e1d96
 +
 +instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
 +%{
-+  predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));
++  predicate(needs_acquiring_load_reserved(n));
 +
 +  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
 +
@@ -35297,7 +32904,7 @@ index 00000000000..588887e1d96
 +  ins_pipe(pipe_slow);
 +%}
 +
-+instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval)
++instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
 +%{
 +  predicate(needs_acquiring_load_reserved(n));
 +
@@ -35322,7 +32929,7 @@ index 00000000000..588887e1d96
 +// can't check the type of memory ordering here, so we always emit a
 +// sc_d(w) with rl bit set.
 +instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                             iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++                             iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 +%{
 +  match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
 +
@@ -35344,7 +32951,7 @@ index 00000000000..588887e1d96
 +%}
 +
 +instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                             iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++                             iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 +%{
 +  match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
 +
@@ -35427,7 +33034,6 @@ index 00000000000..588887e1d96
 +
 +instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval)
 +%{
-+  predicate(n->as_LoadStore()->barrier_data() == 0);
 +  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
 +
 +  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
@@ -35447,7 +33053,7 @@ index 00000000000..588887e1d96
 +%}
 +
 +instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                                iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++                                iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 +%{
 +  predicate(needs_acquiring_load_reserved(n));
 +
@@ -35471,7 +33077,7 @@ index 00000000000..588887e1d96
 +%}
 +
 +instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                                iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++                                iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 +%{
 +  predicate(needs_acquiring_load_reserved(n));
 +
@@ -35562,7 +33168,7 @@ index 00000000000..588887e1d96
 +
 +instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval)
 +%{
-+  predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));
++  predicate(needs_acquiring_load_reserved(n));
 +
 +  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
 +
@@ -35583,7 +33189,7 @@ index 00000000000..588887e1d96
 +%}
 +
 +instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                             iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++                             iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 +%{
 +  match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
 +
@@ -35593,21 +33199,20 @@ index 00000000000..588887e1d96
 +
 +  format %{
 +    "cmpxchg_weak $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapB"
++    "# $res == 1 when success, #@weakCompareAndSwapB"
 +  %}
 +
 +  ins_encode %{
 +    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
 +                                 /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
 +                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
-+    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
 +  ins_pipe(pipe_slow);
 +%}
 +
 +instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                             iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++                             iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 +%{
 +  match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
 +
@@ -35617,14 +33222,13 @@ index 00000000000..588887e1d96
 +
 +  format %{
 +    "cmpxchg_weak $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapS"
++    "# $res == 1 when success, #@weakCompareAndSwapS"
 +  %}
 +
 +  ins_encode %{
 +    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
 +                                 /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
 +                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
-+    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
 +  ins_pipe(pipe_slow);
@@ -35638,13 +33242,12 @@ index 00000000000..588887e1d96
 +
 +  format %{
 +    "cmpxchg_weak $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapI"
++    "# $res == 1 when success, #@weakCompareAndSwapI"
 +  %}
 +
 +  ins_encode %{
 +    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
 +                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
-+    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
 +  ins_pipe(pipe_slow);
@@ -35658,13 +33261,12 @@ index 00000000000..588887e1d96
 +
 +  format %{
 +    "cmpxchg_weak $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapL"
++    "# $res == 1 when success, #@weakCompareAndSwapL"
 +  %}
 +
 +  ins_encode %{
 +    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
 +                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
-+    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
 +  ins_pipe(pipe_slow);
@@ -35678,13 +33280,12 @@ index 00000000000..588887e1d96
 +
 +  format %{
 +    "cmpxchg_weak $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapN"
++    "# $res == 1 when success, #@weakCompareAndSwapN"
 +  %}
 +
 +  ins_encode %{
 +    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
 +                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
-+    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
 +  ins_pipe(pipe_slow);
@@ -35692,27 +33293,25 @@ index 00000000000..588887e1d96
 +
 +instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
 +%{
-+  predicate(n->as_LoadStore()->barrier_data() == 0);
 +  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
 +
 +  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
 +
 +  format %{
 +    "cmpxchg_weak $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapP"
++    "# $res == 1 when success, #@weakCompareAndSwapP"
 +  %}
 +
 +  ins_encode %{
 +    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
 +                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
-+    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
 +  ins_pipe(pipe_slow);
 +%}
 +
 +instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                                iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++                                iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 +%{
 +  predicate(needs_acquiring_load_reserved(n));
 +
@@ -35724,21 +33323,20 @@ index 00000000000..588887e1d96
 +
 +  format %{
 +    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapBAcq"
++    "# $res == 1 when success, #@weakCompareAndSwapBAcq"
 +  %}
 +
 +  ins_encode %{
 +    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
 +                                 /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
 +                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
-+    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
 +  ins_pipe(pipe_slow);
 +%}
 +
 +instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                                iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++                                iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 +%{
 +  predicate(needs_acquiring_load_reserved(n));
 +
@@ -35750,14 +33348,13 @@ index 00000000000..588887e1d96
 +
 +  format %{
 +    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapSAcq"
++    "# $res == 1 when success, #@weakCompareAndSwapSAcq"
 +  %}
 +
 +  ins_encode %{
 +    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
 +                                 /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
 +                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
-+    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
 +  ins_pipe(pipe_slow);
@@ -35773,13 +33370,12 @@ index 00000000000..588887e1d96
 +
 +  format %{
 +    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapIAcq"
++    "# $res == 1 when success, #@weakCompareAndSwapIAcq"
 +  %}
 +
 +  ins_encode %{
 +    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
 +                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
-+    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
 +  ins_pipe(pipe_slow);
@@ -35795,13 +33391,12 @@ index 00000000000..588887e1d96
 +
 +  format %{
 +    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapLAcq"
++    "# $res == 1 when success, #@weakCompareAndSwapLAcq"
 +  %}
 +
 +  ins_encode %{
 +    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
 +                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
-+    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
 +  ins_pipe(pipe_slow);
@@ -35817,13 +33412,12 @@ index 00000000000..588887e1d96
 +
 +  format %{
 +    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapNAcq"
++    "# $res == 1 when success, #@weakCompareAndSwapNAcq"
 +  %}
 +
 +  ins_encode %{
 +    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
 +                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
-+    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
 +  ins_pipe(pipe_slow);
@@ -35831,7 +33425,7 @@ index 00000000000..588887e1d96
 +
 +instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
 +%{
-+  predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));
++  predicate(needs_acquiring_load_reserved(n));
 +
 +  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
 +
@@ -35839,13 +33433,12 @@ index 00000000000..588887e1d96
 +
 +  format %{
 +    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapPAcq"
++    "\t# $res == 1 when success, #@weakCompareAndSwapPAcq"
 +  %}
 +
 +  ins_encode %{
 +    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
 +                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
-+    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
 +  ins_pipe(pipe_slow);
@@ -35898,7 +33491,6 @@ index 00000000000..588887e1d96
 +
 +instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev)
 +%{
-+  predicate(n->as_LoadStore()->barrier_data() == 0);
 +  match(Set prev (GetAndSetP mem newv));
 +
 +  ins_cost(ALU_COST);
@@ -35965,7 +33557,7 @@ index 00000000000..588887e1d96
 +
 +instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev)
 +%{
-+  predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));
++  predicate(needs_acquiring_load_reserved(n));
 +
 +  match(Set prev (GetAndSetP mem newv));
 +
@@ -37285,7 +34877,7 @@ index 00000000000..588887e1d96
 +%}
 +
 +instruct sqrtF_reg(fRegF dst, fRegF src) %{
-+  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
++  match(Set dst (SqrtF src));
 +
 +  ins_cost(FSQRT_COST);
 +  format %{ "fsqrt.s  $dst, $src\t#@sqrtF_reg" %}
@@ -37643,7 +35235,6 @@ index 00000000000..588887e1d96
 +
 +instruct membar_storestore() %{
 +  match(MemBarStoreStore);
-+  match(StoreStoreFence);
 +  ins_cost(ALU_COST);
 +
 +  format %{ "MEMBAR-store-store\t#@membar_storestore" %}
@@ -37728,17 +35319,6 @@ index 00000000000..588887e1d96
 +  ins_pipe(pipe_class_empty);
 +%}
 +
-+instruct castLL(iRegL dst)
-+%{
-+  match(Set dst (CastLL dst));
-+
-+  size(0);
-+  format %{ "# castLL of $dst, #@castLL" %}
-+  ins_encode(/* empty encoding */);
-+  ins_cost(0);
-+  ins_pipe(pipe_class_empty);
-+%}
-+
 +instruct castII(iRegI dst)
 +%{
 +  match(Set dst (CastII dst));
@@ -37761,39 +35341,6 @@ index 00000000000..588887e1d96
 +  ins_pipe(pipe_class_empty);
 +%}
 +
-+instruct castFF(fRegF dst)
-+%{
-+  match(Set dst (CastFF dst));
-+
-+  size(0);
-+  format %{ "# castFF of $dst" %}
-+  ins_encode(/* empty encoding */);
-+  ins_cost(0);
-+  ins_pipe(pipe_class_empty);
-+%}
-+
-+instruct castDD(fRegD dst)
-+%{
-+  match(Set dst (CastDD dst));
-+
-+  size(0);
-+  format %{ "# castDD of $dst" %}
-+  ins_encode(/* empty encoding */);
-+  ins_cost(0);
-+  ins_pipe(pipe_class_empty);
-+%}
-+
-+instruct castVV(vReg dst)
-+%{
-+  match(Set dst (CastVV dst));
-+
-+  size(0);
-+  format %{ "# castVV of $dst" %}
-+  ins_encode(/* empty encoding */);
-+  ins_cost(0);
-+  ins_pipe(pipe_class_empty);
-+%}
-+
 +// ============================================================================
 +// Convert Instructions
 +
@@ -38029,7 +35576,7 @@ index 00000000000..588887e1d96
 +// in case of 32bit oops (heap < 4Gb).
 +instruct convN2I(iRegINoSp dst, iRegN src)
 +%{
-+  predicate(CompressedOops::shift() == 0);
++  predicate(Universe::narrow_oop_shift() == 0);
 +  match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 +
 +  ins_cost(ALU_COST);
@@ -38588,7 +36135,7 @@ index 00000000000..588887e1d96
 +  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpU_branch" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
@@ -38608,7 +36155,7 @@ index 00000000000..588887e1d96
 +  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpU_loop" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
@@ -38667,7 +36214,7 @@ index 00000000000..588887e1d96
 +  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpUL_branch" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
@@ -38686,7 +36233,7 @@ index 00000000000..588887e1d96
 +  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpUL_loop" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
@@ -38707,7 +36254,7 @@ index 00000000000..588887e1d96
 +  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpP_branch" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
@@ -38727,7 +36274,7 @@ index 00000000000..588887e1d96
 +  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpP_loop" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
@@ -38748,7 +36295,7 @@ index 00000000000..588887e1d96
 +  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpN_branch" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
@@ -38768,7 +36315,7 @@ index 00000000000..588887e1d96
 +  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpN_loop" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
@@ -38785,7 +36332,7 @@ index 00000000000..588887e1d96
 +  effect(USE lbl);
 +
 +  ins_cost(XFER_COST + BRANCH_COST);
-+  format %{ "float_b$cmp $op1, $op2 \t#@cmpF_branch"%}
++  format %{ "float_b$cmp $op1, $op2, $lbl \t#@cmpF_branch"%}
 +
 +  ins_encode %{
 +    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label));
@@ -38802,7 +36349,7 @@ index 00000000000..588887e1d96
 +  effect(USE lbl);
 +
 +  ins_cost(XFER_COST + BRANCH_COST);
-+  format %{ "float_b$cmp $op1, $op2\t#@cmpF_loop"%}
++  format %{ "float_b$cmp $op1, $op2, $lbl\t#@cmpF_loop"%}
 +
 +  ins_encode %{
 +    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label));
@@ -38820,10 +36367,10 @@ index 00000000000..588887e1d96
 +  effect(USE lbl);
 +
 +  ins_cost(XFER_COST + BRANCH_COST);
-+  format %{ "double_b$cmp $op1, $op2\t#@cmpD_branch"%}
++  format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_branch"%}
 +
 +  ins_encode %{
-+    __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
++    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
 +                        as_FloatRegister($op2$$reg), *($lbl$$label));
 +  %}
 +
@@ -38838,10 +36385,10 @@ index 00000000000..588887e1d96
 +  effect(USE lbl);
 +
 +  ins_cost(XFER_COST + BRANCH_COST);
-+  format %{ "double_b$cmp $op1, $op2\t#@cmpD_loop"%}
++  format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_loop"%}
 +
 +  ins_encode %{
-+    __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
++    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
 +                        as_FloatRegister($op2$$reg), *($lbl$$label));
 +  %}
 +
@@ -39113,7 +36660,7 @@ index 00000000000..588887e1d96
 +  effect(USE lbl);
 +
 +  ins_cost(BRANCH_COST);
-+  format %{ "far_b$cmp $cr, zr, L\t#@far_cmpFlag_branch"%}
++  format %{ "far_b$cmp $cr, zr, $lbl\t#@far_cmpFlag_branch"%}
 +
 +  ins_encode %{
 +    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label), /* is_far */ true);
@@ -39162,7 +36709,7 @@ index 00000000000..588887e1d96
 +  format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_branch" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
@@ -39177,7 +36724,7 @@ index 00000000000..588887e1d96
 +  format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_loop" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
@@ -39220,7 +36767,7 @@ index 00000000000..588887e1d96
 +  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpUL_branch" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
@@ -39235,7 +36782,7 @@ index 00000000000..588887e1d96
 +  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpUL_loop" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
@@ -39253,7 +36800,7 @@ index 00000000000..588887e1d96
 +  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpP_branch" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
@@ -39271,7 +36818,7 @@ index 00000000000..588887e1d96
 +  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpP_loop" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
@@ -39289,7 +36836,7 @@ index 00000000000..588887e1d96
 +  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpN_branch" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
@@ -39307,7 +36854,7 @@ index 00000000000..588887e1d96
 +  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpN_loop" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
 +                  as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
@@ -39322,7 +36869,7 @@ index 00000000000..588887e1d96
 +  effect(USE lbl);
 +
 +  ins_cost(XFER_COST + BRANCH_COST * 2);
-+  format %{ "far_float_b$cmp $op1, $op2\t#@far_cmpF_branch"%}
++  format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_branch"%}
 +
 +  ins_encode %{
 +    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
@@ -39338,7 +36885,7 @@ index 00000000000..588887e1d96
 +  effect(USE lbl);
 +
 +  ins_cost(XFER_COST + BRANCH_COST * 2);
-+  format %{ "far_float_b$cmp $op1, $op2\t#@far_cmpF_loop"%}
++  format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_loop"%}
 +
 +  ins_encode %{
 +    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
@@ -39355,10 +36902,10 @@ index 00000000000..588887e1d96
 +  effect(USE lbl);
 +
 +  ins_cost(XFER_COST + BRANCH_COST * 2);
-+  format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_branch"%}
++  format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_branch"%}
 +
 +  ins_encode %{
-+    __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
++    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
 +                        as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
@@ -39371,10 +36918,10 @@ index 00000000000..588887e1d96
 +  effect(USE lbl);
 +
 +  ins_cost(XFER_COST + BRANCH_COST * 2);
-+  format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_loop"%}
++  format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_loop"%}
 +
 +  ins_encode %{
-+    __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
++    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
 +                        as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
@@ -39673,10 +37220,8 @@ index 00000000000..588887e1d96
 +  ins_cost(ALU_COST + BRANCH_COST);
 +
 +  format %{
-+             "bneg$cop $op1, $op2, skip\t#@cmovI_cmpI\n\t"
-+             "mv $dst, $src\n\t"
-+             "skip:"
-+         %}
++    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpI\n\t"
++  %}
 +
 +  ins_encode %{
 +    __ enc_cmove($cop$$cmpcode,
@@ -39684,7 +37229,7 @@ index 00000000000..588887e1d96
 +                 as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_compare);
 +%}
 +
 +instruct cmovI_cmpU(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOpU cop) %{
@@ -39692,18 +37237,16 @@ index 00000000000..588887e1d96
 +  ins_cost(ALU_COST + BRANCH_COST);
 +
 +  format %{
-+             "bneg$cop $op1, $op2, skip\t#@cmovI_cmpU\n\t"
-+             "mv $dst, $src\n\t"
-+             "skip:"
-+         %}
++    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpU\n\t"
++  %}
 +
 +  ins_encode %{
-+    __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
++    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
 +                 as_Register($op1$$reg), as_Register($op2$$reg),
 +                 as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_compare);
 +%}
 +
 +instruct cmovI_cmpL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOp cop) %{
@@ -39711,10 +37254,8 @@ index 00000000000..588887e1d96
 +  ins_cost(ALU_COST + BRANCH_COST);
 +
 +  format %{
-+             "bneg$cop $op1, $op2, skip\t#@cmovI_cmpL\n\t"
-+             "mv $dst, $src\n\t"
-+             "skip:"
-+         %}
++    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpL\n\t"
++  %}
 +
 +  ins_encode %{
 +    __ enc_cmove($cop$$cmpcode,
@@ -39722,7 +37263,24 @@ index 00000000000..588887e1d96
 +                 as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_compare);
++%}
++
++instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop) %{
++  match(Set dst (CMoveI (Binary cop (CmpUL op1 op2)) (Binary dst src)));
++  ins_cost(ALU_COST + BRANCH_COST);
++
++  format %{
++    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpUL\n\t"
++  %}
++
++  ins_encode %{
++    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
++                 as_Register($op1$$reg), as_Register($op2$$reg),
++                 as_Register($dst$$reg), as_Register($src$$reg));
++  %}
++
++  ins_pipe(pipe_class_compare);
 +%}
 +
 +instruct cmovL_cmpL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOp cop) %{
@@ -39730,10 +37288,8 @@ index 00000000000..588887e1d96
 +  ins_cost(ALU_COST + BRANCH_COST);
 +
 +  format %{
-+             "bneg$cop $op1, $op2, skip\t#@cmovL_cmpL\n\t"
-+             "mv $dst, $src\n\t"
-+             "skip:"
-+         %}
++    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpL\n\t"
++  %}
 +
 +  ins_encode %{
 +    __ enc_cmove($cop$$cmpcode,
@@ -39741,7 +37297,7 @@ index 00000000000..588887e1d96
 +                 as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_compare);
 +%}
 +
 +instruct cmovL_cmpUL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOpU cop) %{
@@ -39749,38 +37305,51 @@ index 00000000000..588887e1d96
 +  ins_cost(ALU_COST + BRANCH_COST);
 +
 +  format %{
-+             "bneg$cop $op1, $op2, skip\t#@cmovL_cmpUL\n\t"
-+             "mv $dst, $src\n\t"
-+             "skip:"
-+         %}
++    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpUL\n\t"
++  %}
 +
 +  ins_encode %{
-+    __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
++    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
 +                 as_Register($op1$$reg), as_Register($op2$$reg),
 +                 as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_compare);
 +%}
 +
-+instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop) %{
-+  match(Set dst (CMoveI (Binary cop (CmpUL op1 op2)) (Binary dst src)));
++instruct cmovL_cmpI(iRegLNoSp dst, iRegL src, iRegI op1, iRegI op2, cmpOp cop) %{
++  match(Set dst (CMoveL (Binary cop (CmpI op1 op2)) (Binary dst src)));
 +  ins_cost(ALU_COST + BRANCH_COST);
++
 +  format %{
-+             "bneg$cop $op1, $op2\t#@cmovI_cmpUL\n\t"
-+             "mv $dst, $src\n\t"
-+             "skip:"
-+         %}
++    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpI\n\t"
++  %}
 +
 +  ins_encode %{
-+    __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
++    __ enc_cmove($cop$$cmpcode,
 +                 as_Register($op1$$reg), as_Register($op2$$reg),
 +                 as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_compare);
 +%}
 +
++instruct cmovL_cmpU(iRegLNoSp dst, iRegL src, iRegI op1, iRegI op2, cmpOpU cop) %{
++  match(Set dst (CMoveL (Binary cop (CmpU op1 op2)) (Binary dst src)));
++  ins_cost(ALU_COST + BRANCH_COST);
++
++  format %{
++    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpU\n\t"
++  %}
++
++  ins_encode %{
++    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
++                 as_Register($op1$$reg), as_Register($op2$$reg),
++                 as_Register($dst$$reg), as_Register($src$$reg));
++  %}
++
++  ins_pipe(pipe_class_compare);
++%}
 +
 +// ============================================================================
 +// Procedure Call/Return Instructions
@@ -39920,7 +37489,7 @@ index 00000000000..588887e1d96
 +instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
 +                         iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
 +%{
-+  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
++  predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
 +  match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
 +  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
 +
@@ -39938,7 +37507,7 @@ index 00000000000..588887e1d96
 +instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
 +                         iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
 +%{
-+  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
++  predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
 +  match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
 +  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
 +
@@ -39955,7 +37524,7 @@ index 00000000000..588887e1d96
 +instruct string_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
 +                          iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
 +%{
-+  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
++  predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
 +  match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
 +  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
 +
@@ -39973,7 +37542,7 @@ index 00000000000..588887e1d96
 +                          iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3,
 +                          rFlagsReg cr)
 +%{
-+  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
++  predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
 +  match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
 +  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
 +
@@ -40119,7 +37688,6 @@ index 00000000000..588887e1d96
 +                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
 +%{
 +  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
-+  predicate(!UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
 +  effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
 +         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
 +
@@ -40133,28 +37701,9 @@ index 00000000000..588887e1d96
 +%}
 +
 +
-+instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
-+                              iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
-+                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
-+%{
-+  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
-+  predicate(!UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
-+  effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
-+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
-+
-+  format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %}
-+  ins_encode %{
-+    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
-+                           $result$$Register, $tmp1$$Register, $tmp2$$Register,
-+                           $tmp3$$Register, $tmp4$$Register, true /* isL */);
-+  %}
-+  ins_pipe(pipe_class_memory);
-+%}
-+
 +// clearing of an array
 +instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy)
 +%{
-+  predicate(!UseRVV);
 +  match(Set dummy (ClearArray cnt base));
 +  effect(USE_KILL cnt, USE_KILL base);
 +
@@ -40174,8 +37723,7 @@ index 00000000000..588887e1d96
 +
 +instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr)
 +%{
-+  predicate(!UseRVV && (uint64_t)n->in(2)->get_long()
-+            < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
++  predicate((uint64_t)n->in(2)->get_long() < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
 +  match(Set dummy (ClearArray cnt base));
 +  effect(USE_KILL base, KILL cr);
 +
@@ -40192,7 +37740,7 @@ index 00000000000..588887e1d96
 +instruct string_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
 +                        iRegI_R10 result, rFlagsReg cr)
 +%{
-+  predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
++  predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
 +  match(Set result (StrEquals (Binary str1 str2) cnt));
 +  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
 +
@@ -40208,7 +37756,7 @@ index 00000000000..588887e1d96
 +instruct string_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
 +                        iRegI_R10 result, rFlagsReg cr)
 +%{
-+  predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
++  predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
 +  match(Set result (StrEquals (Binary str1 str2) cnt));
 +  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
 +
@@ -40225,7 +37773,7 @@ index 00000000000..588887e1d96
 +                       iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3,
 +                       iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr)
 +%{
-+  predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
++  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
 +  match(Set result (AryEq ary1 ary2));
 +  effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr);
 +
@@ -40242,7 +37790,7 @@ index 00000000000..588887e1d96
 +                       iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3,
 +                       iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr)
 +%{
-+  predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
++  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
 +  match(Set result (AryEq ary1 ary2));
 +  effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr);
 +
@@ -40455,10 +38003,10 @@ index 00000000000..588887e1d96
 +// End:
 diff --git a/src/hotspot/cpu/riscv/riscv_b.ad b/src/hotspot/cpu/riscv/riscv_b.ad
 new file mode 100644
-index 00000000000..4488c1c4031
+index 0000000000..7dda004cd3
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/riscv_b.ad
-@@ -0,0 +1,527 @@
+@@ -0,0 +1,466 @@
 +//
 +// Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
 +// Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
@@ -40486,88 +38034,12 @@ index 00000000000..4488c1c4031
 +
 +// RISCV Bit-Manipulation Extension Architecture Description File
 +
-+instruct rorI_imm_rvb(iRegINoSp dst, iRegI src, immI shift) %{
-+  predicate(UseRVB);
-+  match(Set dst (RotateRight src shift));
-+
-+  format %{ "roriw  $dst, $src, ($shift & 0x1f)\t#@rorI_imm_rvb" %}
-+
-+  ins_cost(ALU_COST);
-+  ins_encode %{
-+    __ roriw(as_Register($dst$$reg), as_Register($src$$reg), $shift$$constant & 0x1f);
-+  %}
-+
-+  ins_pipe(ialu_reg_shift);
-+%}
-+
-+instruct rorL_imm_rvb(iRegLNoSp dst, iRegL src, immI shift) %{
-+  predicate(UseRVB);
-+  match(Set dst (RotateRight src shift));
-+
-+  format %{ "rori  $dst, $src, ($shift & 0x3f)\t#@rorL_imm_rvb" %}
-+
-+  ins_cost(ALU_COST);
-+  ins_encode %{
-+    __ rori(as_Register($dst$$reg), as_Register($src$$reg), $shift$$constant & 0x3f);
-+  %}
-+
-+  ins_pipe(ialu_reg_shift);
-+%}
-+
-+instruct rorI_reg_rvb(iRegINoSp dst, iRegI src, iRegI shift) %{
-+  predicate(UseRVB);
-+  match(Set dst (RotateRight src shift));
-+
-+  format %{ "rorw  $dst, $src, $shift\t#@rorI_reg_rvb" %}
-+  ins_cost(ALU_COST);
-+  ins_encode %{
-+    __ rorw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
-+  %}
-+  ins_pipe(ialu_reg_reg);
-+%}
-+
-+instruct rorL_reg_rvb(iRegLNoSp dst, iRegL src, iRegI shift) %{
-+  predicate(UseRVB);
-+  match(Set dst (RotateRight src shift));
-+
-+  format %{ "ror  $dst, $src, $shift\t#@rorL_reg_rvb" %}
-+  ins_cost(ALU_COST);
-+  ins_encode %{
-+    __ ror(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
-+  %}
-+  ins_pipe(ialu_reg_reg);
-+%}
-+
-+instruct rolI_reg_rvb(iRegINoSp dst, iRegI src, iRegI shift) %{
-+  predicate(UseRVB);
-+  match(Set dst (RotateLeft src shift));
-+
-+  format %{ "rolw  $dst, $src, $shift\t#@rolI_reg_rvb" %}
-+  ins_cost(ALU_COST);
-+  ins_encode %{
-+    __ rolw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
-+  %}
-+  ins_pipe(ialu_reg_reg);
-+%}
-+
-+instruct rolL_reg_rvb(iRegLNoSp dst, iRegL src, iRegI shift) %{
-+  predicate(UseRVB);
-+  match(Set dst (RotateLeft src shift));
-+
-+  format %{ "rol  $dst, $src, $shift\t#@rolL_reg_rvb" %}
-+  ins_cost(ALU_COST);
-+  ins_encode %{
-+    __ rol(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
-+  %}
-+  ins_pipe(ialu_reg_reg);
-+%}
-+
 +// Convert oop into int for vectors alignment masking
-+instruct convP2I_rvb(iRegINoSp dst, iRegP src) %{
-+  predicate(UseRVB);
++instruct convP2I_b(iRegINoSp dst, iRegP src) %{
++  predicate(UseZba);
 +  match(Set dst (ConvL2I (CastP2X src)));
 +
-+  format %{ "zext.w  $dst, $src\t# ptr -> int @convP2I_rvb" %}
++  format %{ "zext.w  $dst, $src\t# ptr -> int @convP2I_b" %}
 +
 +  ins_cost(ALU_COST);
 +  ins_encode %{
@@ -40578,11 +38050,11 @@ index 00000000000..4488c1c4031
 +%}
 +
 +// byte to int
-+instruct convB2I_reg_reg_rvb(iRegINoSp dst, iRegIorL2I src, immI_24 lshift, immI_24 rshift) %{
-+  predicate(UseRVB);
++instruct convB2I_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_24 lshift, immI_24 rshift) %{
++  predicate(UseZbb);
 +  match(Set dst (RShiftI (LShiftI src lshift) rshift));
 +
-+  format %{ "sext.b  $dst, $src\t# b2i, #@convB2I_reg_reg_rvb" %}
++  format %{ "sext.b  $dst, $src\t# b2i, #@convB2I_reg_reg_b" %}
 +
 +  ins_cost(ALU_COST);
 +  ins_encode %{
@@ -40593,11 +38065,11 @@ index 00000000000..4488c1c4031
 +%}
 +
 +// int to short
-+instruct convI2S_reg_reg_rvb(iRegINoSp dst, iRegIorL2I src, immI_16 lshift, immI_16 rshift) %{
-+  predicate(UseRVB);
++instruct convI2S_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_16 lshift, immI_16 rshift) %{
++  predicate(UseZbb);
 +  match(Set dst (RShiftI (LShiftI src lshift) rshift));
 +
-+  format %{ "sext.h  $dst, $src\t# i2s, #@convI2S_reg_reg_rvb" %}
++  format %{ "sext.h  $dst, $src\t# i2s, #@convI2S_reg_reg_b" %}
 +
 +  ins_cost(ALU_COST);
 +  ins_encode %{
@@ -40608,11 +38080,11 @@ index 00000000000..4488c1c4031
 +%}
 +
 +// short to unsigned int
-+instruct convS2UI_reg_reg_rvb(iRegINoSp dst, iRegIorL2I src, immI_16bits mask) %{
-+  predicate(UseRVB);
++instruct convS2UI_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_16bits mask) %{
++  predicate(UseZbb);
 +  match(Set dst (AndI src mask));
 +
-+  format %{ "zext.h  $dst, $src\t# s2ui, #@convS2UI_reg_reg_rvb" %}
++  format %{ "zext.h  $dst, $src\t# s2ui, #@convS2UI_reg_reg_b" %}
 +
 +  ins_cost(ALU_COST);
 +  ins_encode %{
@@ -40623,11 +38095,11 @@ index 00000000000..4488c1c4031
 +%}
 +
 +// int to unsigned long (zero extend)
-+instruct convI2UL_reg_reg_rvb(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) %{
-+  predicate(UseRVB);
++instruct convI2UL_reg_reg_b(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) %{
++  predicate(UseZba);
 +  match(Set dst (AndL (ConvI2L src) mask));
 +
-+  format %{ "zext.w  $dst, $src\t# i2ul, #@convI2UL_reg_reg_rvb" %}
++  format %{ "zext.w  $dst, $src\t# i2ul, #@convI2UL_reg_reg_b" %}
 +
 +  ins_cost(ALU_COST);
 +  ins_encode %{
@@ -40638,12 +38110,12 @@ index 00000000000..4488c1c4031
 +%}
 +
 +// BSWAP instructions
-+instruct bytes_reverse_int_rvb(iRegINoSp dst, iRegIorL2I src) %{
-+  predicate(UseRVB);
++instruct bytes_reverse_int_b(iRegINoSp dst, iRegIorL2I src) %{
++  predicate(UseZbb);
 +  match(Set dst (ReverseBytesI src));
 +
 +  ins_cost(ALU_COST * 2);
-+  format %{ "revb_w_w  $dst, $src\t#@bytes_reverse_int_rvb" %}
++  format %{ "revb_w_w  $dst, $src\t#@bytes_reverse_int_b" %}
 +
 +  ins_encode %{
 +    __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg));
@@ -40652,12 +38124,12 @@ index 00000000000..4488c1c4031
 +  ins_pipe(ialu_reg);
 +%}
 +
-+instruct bytes_reverse_long_rvb(iRegLNoSp dst, iRegL src) %{
-+  predicate(UseRVB);
++instruct bytes_reverse_long_b(iRegLNoSp dst, iRegL src) %{
++  predicate(UseZbb);
 +  match(Set dst (ReverseBytesL src));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "rev8  $dst, $src\t#@bytes_reverse_long_rvb" %}
++  format %{ "rev8  $dst, $src\t#@bytes_reverse_long_b" %}
 +
 +  ins_encode %{
 +    __ rev8(as_Register($dst$$reg), as_Register($src$$reg));
@@ -40666,12 +38138,12 @@ index 00000000000..4488c1c4031
 +  ins_pipe(ialu_reg);
 +%}
 +
-+instruct bytes_reverse_unsigned_short_rvb(iRegINoSp dst, iRegIorL2I src) %{
-+  predicate(UseRVB);
++instruct bytes_reverse_unsigned_short_b(iRegINoSp dst, iRegIorL2I src) %{
++  predicate(UseZbb);
 +  match(Set dst (ReverseBytesUS src));
 +
 +  ins_cost(ALU_COST * 2);
-+  format %{ "revb_h_h_u  $dst, $src\t#@bytes_reverse_unsigned_short_rvb" %}
++  format %{ "revb_h_h_u  $dst, $src\t#@bytes_reverse_unsigned_short_b" %}
 +
 +  ins_encode %{
 +    __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg));
@@ -40680,12 +38152,12 @@ index 00000000000..4488c1c4031
 +  ins_pipe(ialu_reg);
 +%}
 +
-+instruct bytes_reverse_short_rvb(iRegINoSp dst, iRegIorL2I src) %{
-+  predicate(UseRVB);
++instruct bytes_reverse_short_b(iRegINoSp dst, iRegIorL2I src) %{
++  predicate(UseZbb);
 +  match(Set dst (ReverseBytesS src));
 +
 +  ins_cost(ALU_COST * 2);
-+  format %{ "revb_h_h  $dst, $src\t#@bytes_reverse_short_rvb" %}
++  format %{ "revb_h_h  $dst, $src\t#@bytes_reverse_short_b" %}
 +
 +  ins_encode %{
 +    __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg));
@@ -40695,12 +38167,12 @@ index 00000000000..4488c1c4031
 +%}
 +
 +// Shift Add Pointer
-+instruct shaddP_reg_reg_rvb(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale imm) %{
-+  predicate(UseRVB);
++instruct shaddP_reg_reg_b(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale imm) %{
++  predicate(UseZba);
 +  match(Set dst (AddP src1 (LShiftL src2 imm)));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "shadd  $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_rvb" %}
++  format %{ "shadd  $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_b" %}
 +
 +  ins_encode %{
 +    __ shadd(as_Register($dst$$reg),
@@ -40713,12 +38185,12 @@ index 00000000000..4488c1c4031
 +  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct shaddP_reg_reg_ext_rvb(iRegPNoSp dst, iRegP src1, iRegI src2, immIScale imm) %{
-+  predicate(UseRVB);
++instruct shaddP_reg_reg_ext_b(iRegPNoSp dst, iRegP src1, iRegI src2, immIScale imm) %{
++  predicate(UseZba);
 +  match(Set dst (AddP src1 (LShiftL (ConvI2L src2) imm)));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "shadd  $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_ext_rvb" %}
++  format %{ "shadd  $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_ext_b" %}
 +
 +  ins_encode %{
 +    __ shadd(as_Register($dst$$reg),
@@ -40732,12 +38204,12 @@ index 00000000000..4488c1c4031
 +%}
 +
 +// Shift Add Long
-+instruct shaddL_reg_reg_rvb(iRegLNoSp dst, iRegL src1, iRegL src2, immIScale imm) %{
-+  predicate(UseRVB);
++instruct shaddL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immIScale imm) %{
++  predicate(UseZba);
 +  match(Set dst (AddL src1 (LShiftL src2 imm)));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "shadd  $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_rvb" %}
++  format %{ "shadd  $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_b" %}
 +
 +  ins_encode %{
 +    __ shadd(as_Register($dst$$reg),
@@ -40750,12 +38222,12 @@ index 00000000000..4488c1c4031
 +  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct shaddL_reg_reg_ext_rvb(iRegLNoSp dst, iRegL src1, iRegI src2, immIScale imm) %{
-+  predicate(UseRVB);
++instruct shaddL_reg_reg_ext_b(iRegLNoSp dst, iRegL src1, iRegI src2, immIScale imm) %{
++  predicate(UseZba);
 +  match(Set dst (AddL src1 (LShiftL (ConvI2L src2) imm)));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "shadd  $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_ext_rvb" %}
++  format %{ "shadd  $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_ext_b" %}
 +
 +  ins_encode %{
 +    __ shadd(as_Register($dst$$reg),
@@ -40769,12 +38241,12 @@ index 00000000000..4488c1c4031
 +%}
 +
 +// Zeros Count instructions
-+instruct countLeadingZerosI_rvb(iRegINoSp dst, iRegIorL2I src) %{
-+  predicate(UseRVB);
++instruct countLeadingZerosI_b(iRegINoSp dst, iRegIorL2I src) %{
++  predicate(UseZbb);
 +  match(Set dst (CountLeadingZerosI src));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "clzw  $dst, $src\t#@countLeadingZerosI_rvb" %}
++  format %{ "clzw  $dst, $src\t#@countLeadingZerosI_b" %}
 +
 +  ins_encode %{
 +    __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
@@ -40783,12 +38255,12 @@ index 00000000000..4488c1c4031
 +  ins_pipe(ialu_reg);
 +%}
 +
-+instruct countLeadingZerosL_rvb(iRegINoSp dst, iRegL src) %{
-+  predicate(UseRVB);
++instruct countLeadingZerosL_b(iRegINoSp dst, iRegL src) %{
++  predicate(UseZbb);
 +  match(Set dst (CountLeadingZerosL src));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "clz  $dst, $src\t#@countLeadingZerosL_rvb" %}
++  format %{ "clz  $dst, $src\t#@countLeadingZerosL_b" %}
 +
 +  ins_encode %{
 +    __ clz(as_Register($dst$$reg), as_Register($src$$reg));
@@ -40797,12 +38269,12 @@ index 00000000000..4488c1c4031
 +  ins_pipe(ialu_reg);
 +%}
 +
-+instruct countTrailingZerosI_rvb(iRegINoSp dst, iRegIorL2I src) %{
-+  predicate(UseRVB);
++instruct countTrailingZerosI_b(iRegINoSp dst, iRegIorL2I src) %{
++  predicate(UseZbb);
 +  match(Set dst (CountTrailingZerosI src));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "ctzw  $dst, $src\t#@countTrailingZerosI_rvb" %}
++  format %{ "ctzw  $dst, $src\t#@countTrailingZerosI_b" %}
 +
 +  ins_encode %{
 +    __ ctzw(as_Register($dst$$reg), as_Register($src$$reg));
@@ -40811,12 +38283,12 @@ index 00000000000..4488c1c4031
 +  ins_pipe(ialu_reg);
 +%}
 +
-+instruct countTrailingZerosL_rvb(iRegINoSp dst, iRegL src) %{
-+  predicate(UseRVB);
++instruct countTrailingZerosL_b(iRegINoSp dst, iRegL src) %{
++  predicate(UseZbb);
 +  match(Set dst (CountTrailingZerosL src));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "ctz  $dst, $src\t#@countTrailingZerosL_rvb" %}
++  format %{ "ctz  $dst, $src\t#@countTrailingZerosL_b" %}
 +
 +  ins_encode %{
 +    __ ctz(as_Register($dst$$reg), as_Register($src$$reg));
@@ -40826,12 +38298,12 @@ index 00000000000..4488c1c4031
 +%}
 +
 +// Population Count instructions
-+instruct popCountI_rvb(iRegINoSp dst, iRegIorL2I src) %{
++instruct popCountI_b(iRegINoSp dst, iRegIorL2I src) %{
 +  predicate(UsePopCountInstruction);
 +  match(Set dst (PopCountI src));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "cpopw  $dst, $src\t#@popCountI_rvb" %}
++  format %{ "cpopw  $dst, $src\t#@popCountI_b" %}
 +
 +  ins_encode %{
 +    __ cpopw(as_Register($dst$$reg), as_Register($src$$reg));
@@ -40841,12 +38313,12 @@ index 00000000000..4488c1c4031
 +%}
 +
 +// Note: Long/bitCount(long) returns an int.
-+instruct popCountL_rvb(iRegINoSp dst, iRegL src) %{
++instruct popCountL_b(iRegINoSp dst, iRegL src) %{
 +  predicate(UsePopCountInstruction);
 +  match(Set dst (PopCountL src));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "cpop  $dst, $src\t#@popCountL_rvb" %}
++  format %{ "cpop  $dst, $src\t#@popCountL_b" %}
 +
 +  ins_encode %{
 +    __ cpop(as_Register($dst$$reg), as_Register($src$$reg));
@@ -40856,12 +38328,12 @@ index 00000000000..4488c1c4031
 +%}
 +
 +// Max and Min
-+instruct minI_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2) %{
-+  predicate(UseRVB);
++instruct minI_reg_b(iRegINoSp dst, iRegI src1, iRegI src2) %{
++  predicate(UseZbb);
 +  match(Set dst (MinI src1 src2));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "min  $dst, $src1, $src2\t#@minI_reg_rvb" %}
++  format %{ "min  $dst, $src1, $src2\t#@minI_reg_b" %}
 +
 +  ins_encode %{
 +    __ min(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg));
@@ -40870,12 +38342,12 @@ index 00000000000..4488c1c4031
 +  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct maxI_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2) %{
-+  predicate(UseRVB);
++instruct maxI_reg_b(iRegINoSp dst, iRegI src1, iRegI src2) %{
++  predicate(UseZbb);
 +  match(Set dst (MaxI src1 src2));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "max  $dst, $src1, $src2\t#@maxI_reg_rvb" %}
++  format %{ "max  $dst, $src1, $src2\t#@maxI_reg_b" %}
 +
 +  ins_encode %{
 +    __ max(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg));
@@ -40885,14 +38357,14 @@ index 00000000000..4488c1c4031
 +%}
 +
 +// Abs
-+instruct absI_reg_rvb(iRegINoSp dst, iRegI src) %{
-+  predicate(UseRVB);
++instruct absI_reg_b(iRegINoSp dst, iRegI src) %{
++  predicate(UseZbb);
 +  match(Set dst (AbsI src));
 +
 +  ins_cost(ALU_COST * 2);
 +  format %{
 +    "negw  t0, $src\n\t"
-+    "max  $dst, $src, t0\t#@absI_reg_rvb"
++    "max  $dst, $src, t0\t#@absI_reg_b"
 +  %}
 +
 +  ins_encode %{
@@ -40903,14 +38375,14 @@ index 00000000000..4488c1c4031
 +  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct absL_reg_rvb(iRegLNoSp dst, iRegL src) %{
-+  predicate(UseRVB);
++instruct absL_reg_b(iRegLNoSp dst, iRegL src) %{
++  predicate(UseZbb);
 +  match(Set dst (AbsL src));
 +
 +  ins_cost(ALU_COST * 2);
 +  format %{
 +    "neg  t0, $src\n\t"
-+    "max $dst, $src, t0\t#@absL_reg_rvb"
++    "max  $dst, $src, t0\t#@absL_reg_b"
 +  %}
 +
 +  ins_encode %{
@@ -40922,12 +38394,12 @@ index 00000000000..4488c1c4031
 +%}
 +
 +// And Not
-+instruct andnI_reg_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{
-+  predicate(UseRVB);
++instruct andnI_reg_reg_b(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{
++  predicate(UseZbb);
 +  match(Set dst (AndI src1 (XorI src2 m1)));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "andn  $dst, $src1, $src2\t#@andnI_reg_reg_rvb" %}
++  format %{ "andn  $dst, $src1, $src2\t#@andnI_reg_reg_b" %}
 +
 +  ins_encode %{
 +    __ andn(as_Register($dst$$reg),
@@ -40938,12 +38410,12 @@ index 00000000000..4488c1c4031
 +  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct andnL_reg_reg_rvb(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{
-+  predicate(UseRVB);
++instruct andnL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{
++  predicate(UseZbb);
 +  match(Set dst (AndL src1 (XorL src2 m1)));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "andn  $dst, $src1, $src2\t#@andnL_reg_reg_rvb" %}
++  format %{ "andn  $dst, $src1, $src2\t#@andnL_reg_reg_b" %}
 +
 +  ins_encode %{
 +    __ andn(as_Register($dst$$reg),
@@ -40955,12 +38427,12 @@ index 00000000000..4488c1c4031
 +%}
 +
 +// Or Not
-+instruct ornI_reg_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{
-+  predicate(UseRVB);
++instruct ornI_reg_reg_b(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{
++  predicate(UseZbb);
 +  match(Set dst (OrI src1 (XorI src2 m1)));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "orn  $dst, $src1, $src2\t#@ornI_reg_reg_rvb" %}
++  format %{ "orn  $dst, $src1, $src2\t#@ornI_reg_reg_b" %}
 +
 +  ins_encode %{
 +    __ orn(as_Register($dst$$reg),
@@ -40971,12 +38443,12 @@ index 00000000000..4488c1c4031
 +  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct ornL_reg_reg_rvb(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{
-+  predicate(UseRVB);
++instruct ornL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{
++  predicate(UseZbb);
 +  match(Set dst (OrL src1 (XorL src2 m1)));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "orn  $dst, $src1, $src2\t#@ornL_reg_reg_rvb" %}
++  format %{ "orn  $dst, $src1, $src2\t#@ornL_reg_reg_b" %}
 +
 +  ins_encode %{
 +    __ orn(as_Register($dst$$reg),
@@ -40985,8722 +38457,8508 @@ index 00000000000..4488c1c4031
 +  %}
 +
 +  ins_pipe(ialu_reg_reg);
++
++%}
++
++// AndI 0b0..010..0 + ConvI2B
++instruct convI2Bool_andI_reg_immIpowerOf2(iRegINoSp dst, iRegIorL2I src, immIpowerOf2 mask) %{
++  predicate(UseZbs);
++  match(Set dst (Conv2B (AndI src mask)));
++  ins_cost(ALU_COST);
++
++  format %{ "bexti  $dst, $src, $mask\t#@convI2Bool_andI_reg_immIpowerOf2" %}
++  ins_encode %{
++    __ bexti($dst$$Register, $src$$Register, exact_log2((juint)($mask$$constant)));
++  %}
++
++  ins_pipe(ialu_reg_reg);
 +%}
 \ No newline at end of file
-diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad
+diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
 new file mode 100644
-index 00000000000..3828e096b21
+index 0000000000..7b1112b388
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/riscv_v.ad
-@@ -0,0 +1,2065 @@
-+//
-+// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
-+// Copyright (c) 2020, Arm Limited. All rights reserved.
-+// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+//
-+// This code is free software; you can redistribute it and/or modify it
-+// under the terms of the GNU General Public License version 2 only, as
-+// published by the Free Software Foundation.
-+//
-+// This code is distributed in the hope that it will be useful, but WITHOUT
-+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+// version 2 for more details (a copy is included in the LICENSE file that
-+// accompanied this code).
-+//
-+// You should have received a copy of the GNU General Public License version
-+// 2 along with this work; if not, write to the Free Software Foundation,
-+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+//
-+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+// or visit www.oracle.com if you need additional information or have any
-+// questions.
-+//
-+//
++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+@@ -0,0 +1,2661 @@
++/*
++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+// RISCV Vector Extension Architecture Description File
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/debugInfoRec.hpp"
++#include "code/icBuffer.hpp"
++#include "code/vtableStubs.hpp"
++#include "compiler/oopMap.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/interpreter.hpp"
++#include "logging/log.hpp"
++#include "memory/resourceArea.hpp"
++#include "nativeInst_riscv.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "oops/klass.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/jniHandles.hpp"
++#include "runtime/safepointMechanism.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/signature.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/align.hpp"
++#include "utilities/formatBuffer.hpp"
++#include "vmreg_riscv.inline.hpp"
++#ifdef COMPILER1
++#include "c1/c1_Runtime1.hpp"
++#endif
++#ifdef COMPILER2
++#include "adfiles/ad_riscv.hpp"
++#include "opto/runtime.hpp"
++#endif
 +
-+opclass vmemA(indirect);
++#define __ masm->
 +
-+source_hpp %{
-+  bool op_vec_supported(int opcode);
-+%}
++const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
 +
-+source %{
++class SimpleRuntimeFrame {
++public:
 +
-+  static void loadStore(C2_MacroAssembler masm, bool is_store,
-+                        VectorRegister reg, BasicType bt, Register base) {
-+    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
-+    masm.vsetvli(t0, x0, sew);
-+    if (is_store) {
-+      masm.vsex_v(reg, base, sew);
-+    } else {
-+      masm.vlex_v(reg, base, sew);
-+    }
-+  }
-+
-+  bool op_vec_supported(int opcode) {
-+    switch (opcode) {
-+      // No multiply reduction instructions
-+      case Op_MulReductionVD:
-+      case Op_MulReductionVF:
-+      case Op_MulReductionVI:
-+      case Op_MulReductionVL:
-+      // Others
-+      case Op_Extract:
-+      case Op_ExtractB:
-+      case Op_ExtractC:
-+      case Op_ExtractD:
-+      case Op_ExtractF:
-+      case Op_ExtractI:
-+      case Op_ExtractL:
-+      case Op_ExtractS:
-+      case Op_ExtractUB:
-+      // Vector API specific
-+      case Op_AndReductionV:
-+      case Op_OrReductionV:
-+      case Op_XorReductionV:
-+      case Op_LoadVectorGather:
-+      case Op_StoreVectorScatter:
-+      case Op_VectorBlend:
-+      case Op_VectorCast:
-+      case Op_VectorCastB2X:
-+      case Op_VectorCastD2X:
-+      case Op_VectorCastF2X:
-+      case Op_VectorCastI2X:
-+      case Op_VectorCastL2X:
-+      case Op_VectorCastS2X:
-+      case Op_VectorInsert:
-+      case Op_VectorLoadConst:
-+      case Op_VectorLoadMask:
-+      case Op_VectorLoadShuffle:
-+      case Op_VectorMaskCmp:
-+      case Op_VectorRearrange:
-+      case Op_VectorReinterpret:
-+      case Op_VectorStoreMask:
-+      case Op_VectorTest:
-+        return false;
-+      default:
-+        return UseRVV;
-+    }
-+  }
++  // Most of the runtime stubs have this simple frame layout.
++  // This class exists to make the layout shared in one place.
++  // Offsets are for compiler stack slots, which are jints.
++  enum layout {
++    // The frame sender code expects that fp will be in the "natural" place and
++    // will override any oopMap setting for it. We must therefore force the layout
++    // so that it agrees with the frame sender code.
++    // we don't expect any arg reg save area so riscv asserts that
++    // frame::arg_reg_save_area_bytes == 0
++    fp_off = 0, fp_off2,
++    return_off, return_off2,
++    framesize
++  };
++};
 +
-+%}
++class RegisterSaver {
++ public:
++  RegisterSaver() {}
++  ~RegisterSaver() {}
++  OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
++  void restore_live_registers(MacroAssembler* masm);
 +
-+definitions %{
-+  int_def VEC_COST             (200, 200);
-+%}
++  // Offsets into the register save area
++  // Used by deoptimization when it is managing result register
++  // values on its own
++  // gregs:28, float_register:32; except: x1(ra) & x2(sp) & gp(x3) & tp(x4)
++  // |---f0---|<---SP
++  // |---f1---|
++  // |   ..   |
++  // |---f31--|
++  // |---reserved slot for stack alignment---|
++  // |---x5---|
++  // |   x6   |
++  // |---.. --|
++  // |---x31--|
++  // |---fp---|
++  // |---ra---|
++  int f0_offset_in_bytes(void) {
++    return 0;
++  }
++  int reserved_slot_offset_in_bytes(void) {
++    return f0_offset_in_bytes() +
++           FloatRegisterImpl::max_slots_per_register *
++           FloatRegisterImpl::number_of_registers *
++           BytesPerInt;
++  }
 +
-+// All VEC instructions
++  int reg_offset_in_bytes(Register r) {
++    assert (r->encoding() > 4, "ra, sp, gp and tp not saved");
++    return reserved_slot_offset_in_bytes() + (r->encoding() - 4 /* x1, x2, x3, x4 */) * wordSize;
++  }
 +
-+// vector load/store
-+instruct loadV(vReg dst, vmemA mem) %{
-+  match(Set dst (LoadVector mem));
-+  ins_cost(VEC_COST);
-+  format %{ "vle $dst, $mem\t#@loadV" %}
-+  ins_encode %{
-+    VectorRegister dst_reg = as_VectorRegister($dst$$reg);
-+    loadStore(C2_MacroAssembler(&cbuf), false, dst_reg,
-+              Matcher::vector_element_basic_type(this), as_Register($mem$$base));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  int freg_offset_in_bytes(FloatRegister f) {
++    return f0_offset_in_bytes() + f->encoding() * wordSize;
++  }
 +
-+instruct storeV(vReg src, vmemA mem) %{
-+  match(Set mem (StoreVector mem src));
-+  ins_cost(VEC_COST);
-+  format %{ "vse $src, $mem\t#@storeV" %}
-+  ins_encode %{
-+    VectorRegister src_reg = as_VectorRegister($src$$reg);
-+    loadStore(C2_MacroAssembler(&cbuf), true, src_reg,
-+              Matcher::vector_element_basic_type(this, $src), as_Register($mem$$base));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  int ra_offset_in_bytes(void) {
++    return reserved_slot_offset_in_bytes() +
++           (RegisterImpl::number_of_registers - 3) *
++           RegisterImpl::max_slots_per_register *
++           BytesPerInt;
++  }
++};
 +
-+// vector abs
++OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
++  int frame_size_in_bytes = align_up(additional_frame_words * wordSize + ra_offset_in_bytes() + wordSize, 16);
++  // OopMap frame size is in compiler stack slots (jint's) not bytes or words
++  int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
++  // The caller will allocate additional_frame_words
++  int additional_frame_slots = additional_frame_words * wordSize / BytesPerInt;
++  // CodeBlob frame size is in words.
++  int frame_size_in_words = frame_size_in_bytes / wordSize;
++  *total_frame_words = frame_size_in_words;
 +
-+instruct vabsB(vReg dst, vReg src, vReg tmp) %{
-+  match(Set dst (AbsVB src));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsB\n\t"
-+            "vmax.vv $dst, $tmp, $src" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
-+    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // Save Integer and Float registers.
++  __ enter();
++  __ push_CPU_state();
 +
-+instruct vabsS(vReg dst, vReg src, vReg tmp) %{
-+  match(Set dst (AbsVS src));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsS\n\t"
-+            "vmax.vv $dst, $tmp, $src" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
-+    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // Set an oopmap for the call site.  This oopmap will map all
++  // oop-registers and debug-info registers as callee-saved.  This
++  // will allow deoptimization at this safepoint to find all possible
++  // debug-info recordings, as well as let GC find all oops.
 +
-+instruct vabsI(vReg dst, vReg src, vReg tmp) %{
-+  match(Set dst (AbsVI src));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsI\n\t"
-+            "vmax.vv $dst, $tmp, $src" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
-+    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
++  assert_cond(oop_maps != NULL && oop_map != NULL);
 +
-+instruct vabsL(vReg dst, vReg src, vReg tmp) %{
-+  match(Set dst (AbsVL src));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsL\n\t"
-+            "vmax.vv $dst, $tmp, $src" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
-+    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  int sp_offset_in_slots = 0;
++  int step_in_slots = 0;
 +
-+instruct vabsF(vReg dst, vReg src) %{
-+  match(Set dst (AbsVF src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  step_in_slots = FloatRegisterImpl::max_slots_per_register;
++  for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
++    FloatRegister r = as_FloatRegister(i);
++    oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg());
++  }
 +
-+instruct vabsD(vReg dst, vReg src) %{
-+  match(Set dst (AbsVD src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  step_in_slots = RegisterImpl::max_slots_per_register;
++  // skip the slot reserved for alignment, see MacroAssembler::push_reg;
++  // also skip x5 ~ x6 on the stack because they are caller-saved registers.
++  sp_offset_in_slots += RegisterImpl::max_slots_per_register * 3;
++  // besides, we ignore x0 ~ x4 because push_CPU_state won't push them on the stack.
++  for (int i = 7; i < RegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
++    Register r = as_Register(i);
++    if (r != xthread) {
++      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots + additional_frame_slots), r->as_VMReg());
++    }
++  }
 +
-+// vector add
++  return oop_map;
++}
 +
-+instruct vaddB(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (AddVB src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddB" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vadd_vv(as_VectorRegister($dst$$reg),
-+               as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
++  __ pop_CPU_state();
++  __ leave();
++}
 +
-+instruct vaddS(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (AddVS src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddS" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vadd_vv(as_VectorRegister($dst$$reg),
-+               as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++// Is vector's size (in bytes) bigger than a size saved by default?
++bool SharedRuntime::is_wide_vector(int size) {
++  return false;
++}
 +
-+instruct vaddI(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (AddVI src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vadd_vv(as_VectorRegister($dst$$reg),
-+               as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++size_t SharedRuntime::trampoline_size() {
++  return 6 * NativeInstruction::instruction_size;
++}
 +
-+instruct vaddL(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (AddVL src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddL" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vadd_vv(as_VectorRegister($dst$$reg),
-+               as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
++  int32_t offset = 0;
++  __ movptr_with_offset(t0, destination, offset);
++  __ jalr(x0, t0, offset);
++}
 +
-+instruct vaddF(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (AddVF src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfadd_vv(as_VectorRegister($dst$$reg),
-+                as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++// The java_calling_convention describes stack locations as ideal slots on
++// a frame with no abi restrictions. Since we must observe abi restrictions
++// (like the placement of the register window) the slots must be biased by
++// the following value.
++static int reg2offset_in(VMReg r) {
++  // Account for saved fp and ra
++  // This should really be in_preserve_stack_slots
++  return r->reg2stack() * VMRegImpl::stack_slot_size;
++}
 +
-+instruct vaddD(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (AddVD src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfadd_vv(as_VectorRegister($dst$$reg),
-+                as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++static int reg2offset_out(VMReg r) {
++  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
++}
 +
-+// vector and
++// ---------------------------------------------------------------------------
++// Read the array of BasicTypes from a signature, and compute where the
++// arguments should go.  Values in the VMRegPair regs array refer to 4-byte
++// quantities.  Values less than VMRegImpl::stack0 are registers, those above
++// refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
++// as framesizes are fixed.
++// VMRegImpl::stack0 refers to the first slot 0(sp).
++// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
++// up to RegisterImpl::number_of_registers) are the 64-bit
++// integer registers.
 +
-+instruct vand(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (AndV src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vand.vv  $dst, $src1, $src2\t#@vand" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vand_vv(as_VectorRegister($dst$$reg),
-+               as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++// Note: the INPUTS in sig_bt are in units of Java argument words,
++// which are 64-bit.  The OUTPUTS are in 32-bit units.
 +
-+// vector or
++// The Java calling convention is a "shifted" version of the C ABI.
++// By skipping the first C ABI register we can call non-static jni
++// methods with small numbers of arguments without having to shuffle
++// the arguments at all. Since we control the java ABI we ought to at
++// least get some advantage out of it.
 +
-+instruct vor(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (OrV src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vor.vv  $dst, $src1, $src2\t#@vor" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vor_vv(as_VectorRegister($dst$$reg),
-+              as_VectorRegister($src1$$reg),
-+              as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
++                                           VMRegPair *regs,
++                                           int total_args_passed,
++                                           int is_outgoing) {
++  // Create the mapping between argument positions and
++  // registers.
++  static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = {
++    j_rarg0, j_rarg1, j_rarg2, j_rarg3,
++    j_rarg4, j_rarg5, j_rarg6, j_rarg7
++  };
++  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = {
++    j_farg0, j_farg1, j_farg2, j_farg3,
++    j_farg4, j_farg5, j_farg6, j_farg7
++  };
 +
-+// vector xor
++  uint int_args = 0;
++  uint fp_args = 0;
++  uint stk_args = 0; // inc by 2 each time
 +
-+instruct vxor(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (XorV src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vxor.vv  $dst, $src1, $src2\t#@vxor" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vxor_vv(as_VectorRegister($dst$$reg),
-+               as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  for (int i = 0; i < total_args_passed; i++) {
++    switch (sig_bt[i]) {
++      case T_BOOLEAN: // fall through
++      case T_CHAR:    // fall through
++      case T_BYTE:    // fall through
++      case T_SHORT:   // fall through
++      case T_INT:
++        if (int_args < Argument::n_int_register_parameters_j) {
++          regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
++        } else {
++          regs[i].set1(VMRegImpl::stack2reg(stk_args));
++          stk_args += 2;
++        }
++        break;
++      case T_VOID:
++        // halves of T_LONG or T_DOUBLE
++        assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
++        regs[i].set_bad();
++        break;
++      case T_LONG:      // fall through
++        assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
++      case T_OBJECT:    // fall through
++      case T_ARRAY:     // fall through
++      case T_ADDRESS:
++        if (int_args < Argument::n_int_register_parameters_j) {
++          regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
++        } else {
++          regs[i].set2(VMRegImpl::stack2reg(stk_args));
++          stk_args += 2;
++        }
++        break;
++      case T_FLOAT:
++        if (fp_args < Argument::n_float_register_parameters_j) {
++          regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
++        } else {
++          regs[i].set1(VMRegImpl::stack2reg(stk_args));
++          stk_args += 2;
++        }
++        break;
++      case T_DOUBLE:
++        assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
++        if (fp_args < Argument::n_float_register_parameters_j) {
++          regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
++        } else {
++          regs[i].set2(VMRegImpl::stack2reg(stk_args));
++          stk_args += 2;
++        }
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++  }
 +
-+// vector float div
++  return align_up(stk_args, 2);
++}
 +
-+instruct vdivF(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (DivVF src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfdiv.vv  $dst, $src1, $src2\t#@vdivF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfdiv_vv(as_VectorRegister($dst$$reg),
-+                as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++// Patch the callers callsite with entry to compiled code if it exists.
++static void patch_callers_callsite(MacroAssembler *masm) {
++  Label L;
++  __ ld(t0, Address(xmethod, in_bytes(Method::code_offset())));
++  __ beqz(t0, L);
 +
-+instruct vdivD(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (DivVD src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfdiv.vv  $dst, $src1, $src2\t#@vdivD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfdiv_vv(as_VectorRegister($dst$$reg),
-+                as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  __ enter();
++  __ push_CPU_state();
 +
-+// vector integer max/min
++  // VM needs caller's callsite
++  // VM needs target method
++  // This needs to be a long call since we will relocate this adapter to
++  // the codeBuffer and it may not reach
 +
-+instruct vmax(vReg dst, vReg src1, vReg src2) %{
-+  predicate(n->bottom_type()->is_vect()->element_basic_type() != T_FLOAT &&
-+            n->bottom_type()->is_vect()->element_basic_type() != T_DOUBLE);
-+  match(Set dst (MaxV src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vmax.vv $dst, $src1, $src2\t#@vmax" %}
-+  ins_encode %{
-+    BasicType bt = Matcher::vector_element_basic_type(this);
-+    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
-+    __ vsetvli(t0, x0, sew);
-+    __ vmax_vv(as_VectorRegister($dst$$reg),
-+               as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++#ifndef PRODUCT
++  assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
++#endif
 +
-+instruct vmin(vReg dst, vReg src1, vReg src2) %{
-+  predicate(n->bottom_type()->is_vect()->element_basic_type() != T_FLOAT &&
-+            n->bottom_type()->is_vect()->element_basic_type() != T_DOUBLE);
-+  match(Set dst (MinV src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vmin.vv $dst, $src1, $src2\t#@vmin" %}
-+  ins_encode %{
-+    BasicType bt = Matcher::vector_element_basic_type(this);
-+    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
-+    __ vsetvli(t0, x0, sew);
-+    __ vmin_vv(as_VectorRegister($dst$$reg),
-+               as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  __ mv(c_rarg0, xmethod);
++  __ mv(c_rarg1, ra);
++  int32_t offset = 0;
++  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)), offset);
++  __ jalr(x1, t0, offset);
 +
-+// vector float-point max/min
++  __ pop_CPU_state();
++  // restore sp
++  __ leave();
++  __ bind(L);
++}
 +
-+instruct vmaxF(vReg dst, vReg src1, vReg src2) %{
-+  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
-+  match(Set dst (MaxV src1 src2));
-+  effect(TEMP_DEF dst);
-+  ins_cost(VEC_COST);
-+  format %{ "vmaxF $dst, $src1, $src2\t#@vmaxF" %}
-+  ins_encode %{
-+    __ minmax_FD_v(as_VectorRegister($dst$$reg),
-+                   as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
-+                   false /* is_double */, false /* is_min */);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++static void gen_c2i_adapter(MacroAssembler *masm,
++                            int total_args_passed,
++                            int comp_args_on_stack,
++                            const BasicType *sig_bt,
++                            const VMRegPair *regs,
++                            Label& skip_fixup) {
++  // Before we get into the guts of the C2I adapter, see if we should be here
++  // at all.  We've come from compiled code and are attempting to jump to the
++  // interpreter, which means the caller made a static call to get here
++  // (vcalls always get a compiled target if there is one).  Check for a
++  // compiled target.  If there is one, we need to patch the caller's call.
++  patch_callers_callsite(masm);
 +
-+instruct vmaxD(vReg dst, vReg src1, vReg src2) %{
-+  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
-+  match(Set dst (MaxV src1 src2));
-+  effect(TEMP_DEF dst);
-+  ins_cost(VEC_COST);
-+  format %{ "vmaxD $dst, $src1, $src2\t#@vmaxD" %}
-+  ins_encode %{
-+    __ minmax_FD_v(as_VectorRegister($dst$$reg),
-+                   as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
-+                   true /* is_double */, false /* is_min */);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  __ bind(skip_fixup);
 +
-+instruct vminF(vReg dst, vReg src1, vReg src2) %{
-+  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
-+  match(Set dst (MinV src1 src2));
-+  effect(TEMP_DEF dst);
-+  ins_cost(VEC_COST);
-+  format %{ "vminF $dst, $src1, $src2\t#@vminF" %}
-+  ins_encode %{
-+    __ minmax_FD_v(as_VectorRegister($dst$$reg),
-+                   as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
-+                   false /* is_double */, true /* is_min */);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  int words_pushed = 0;
 +
-+instruct vminD(vReg dst, vReg src1, vReg src2) %{
-+  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
-+  match(Set dst (MinV src1 src2));
-+  effect(TEMP_DEF dst);
-+  ins_cost(VEC_COST);
-+  format %{ "vminD $dst, $src1, $src2\t#@vminD" %}
-+  ins_encode %{
-+    __ minmax_FD_v(as_VectorRegister($dst$$reg),
-+                   as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
-+                   true /* is_double */, true /* is_min */);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // Since all args are passed on the stack, total_args_passed *
++  // Interpreter::stackElementSize is the space we need.
 +
-+// vector fmla
++  int extraspace = total_args_passed * Interpreter::stackElementSize;
 +
-+// dst_src1 = dst_src1 + src2 * src3
-+instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  __ mv(x30, sp);
 +
-+// dst_src1 = dst_src1 + src2 * src3
-+instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // stack is aligned, keep it that way
++  extraspace = align_up(extraspace, 2 * wordSize);
 +
-+// vector fmls
++  if (extraspace) {
++    __ sub(sp, sp, extraspace);
++  }
 +
-+// dst_src1 = dst_src1 + -src2 * src3
-+// dst_src1 = dst_src1 + src2 * -src3
-+instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3)));
-+  match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3))));
-+  ins_cost(VEC_COST);
-+  format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // Now write the args into the outgoing interpreter space
++  for (int i = 0; i < total_args_passed; i++) {
++    if (sig_bt[i] == T_VOID) {
++      assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half");
++      continue;
++    }
 +
-+// dst_src1 = dst_src1 + -src2 * src3
-+// dst_src1 = dst_src1 + src2 * -src3
-+instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3)));
-+  match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3))));
-+  ins_cost(VEC_COST);
-+  format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++    // offset to start parameters
++    int st_off   = (total_args_passed - i - 1) * Interpreter::stackElementSize;
++    int next_off = st_off - Interpreter::stackElementSize;
 +
-+// vector fnmla
++    // Say 4 args:
++    // i   st_off
++    // 0   32 T_LONG
++    // 1   24 T_VOID
++    // 2   16 T_OBJECT
++    // 3    8 T_BOOL
++    // -    0 return address
++    //
++    // However to make thing extra confusing. Because we can fit a Java long/double in
++    // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
++    // leaves one slot empty and only stores to a single slot. In this case the
++    // slot that is occupied is the T_VOID slot. See I said it was confusing.
 +
-+// dst_src1 = -dst_src1 + -src2 * src3
-+// dst_src1 = -dst_src1 + src2 * -src3
-+instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3)));
-+  match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3))));
-+  ins_cost(VEC_COST);
-+  format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++    VMReg r_1 = regs[i].first();
++    VMReg r_2 = regs[i].second();
++    if (!r_1->is_valid()) {
++      assert(!r_2->is_valid(), "");
++      continue;
++    }
++    if (r_1->is_stack()) {
++      // memory to memory use t0
++      int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size
++                    + extraspace
++                    + words_pushed * wordSize);
++      if (!r_2->is_valid()) {
++        __ lwu(t0, Address(sp, ld_off));
++        __ sd(t0, Address(sp, st_off), /*temp register*/esp);
++      } else {
++        __ ld(t0, Address(sp, ld_off), /*temp register*/esp);
 +
-+// dst_src1 = -dst_src1 + -src2 * src3
-+// dst_src1 = -dst_src1 + src2 * -src3
-+instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3)));
-+  match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3))));
-+  ins_cost(VEC_COST);
-+  format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++        // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
++        // T_DOUBLE and T_LONG use two slots in the interpreter
++        if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
++          // ld_off == LSW, ld_off+wordSize == MSW
++          // st_off == MSW, next_off == LSW
++          __ sd(t0, Address(sp, next_off), /*temp register*/esp);
++#ifdef ASSERT
++          // Overwrite the unused slot with known junk
++          __ mv(t0, 0xdeadffffdeadaaaaul);
++          __ sd(t0, Address(sp, st_off), /*temp register*/esp);
++#endif /* ASSERT */
++        } else {
++          __ sd(t0, Address(sp, st_off), /*temp register*/esp);
++        }
++      }
++    } else if (r_1->is_Register()) {
++      Register r = r_1->as_Register();
++      if (!r_2->is_valid()) {
++        // must be only an int (or less ) so move only 32bits to slot
++        __ sd(r, Address(sp, st_off));
++      } else {
++        // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
++        // T_DOUBLE and T_LONG use two slots in the interpreter
++        if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
++          // long/double in gpr
++#ifdef ASSERT
++          // Overwrite the unused slot with known junk
++          __ mv(t0, 0xdeadffffdeadaaabul);
++          __ sd(t0, Address(sp, st_off), /*temp register*/esp);
++#endif /* ASSERT */
++          __ sd(r, Address(sp, next_off));
++        } else {
++          __ sd(r, Address(sp, st_off));
++        }
++      }
++    } else {
++      assert(r_1->is_FloatRegister(), "");
++      if (!r_2->is_valid()) {
++        // only a float use just part of the slot
++        __ fsw(r_1->as_FloatRegister(), Address(sp, st_off));
++      } else {
++#ifdef ASSERT
++        // Overwrite the unused slot with known junk
++        __ mv(t0, 0xdeadffffdeadaaacul);
++        __ sd(t0, Address(sp, st_off), /*temp register*/esp);
++#endif /* ASSERT */
++        __ fsd(r_1->as_FloatRegister(), Address(sp, next_off));
++      }
++    }
++  }
 +
-+// vector fnmls
++  __ mv(esp, sp); // Interp expects args on caller's expression stack
 +
-+// dst_src1 = -dst_src1 + src2 * src3
-+instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  __ ld(t0, Address(xmethod, in_bytes(Method::interpreter_entry_offset())));
++  __ jr(t0);
++}
 +
-+// dst_src1 = -dst_src1 + src2 * src3
-+instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
++                                    int total_args_passed,
++                                    int comp_args_on_stack,
++                                    const BasicType *sig_bt,
++                                    const VMRegPair *regs) {
++  // Cut-out for having no stack args.
++  int comp_words_on_stack = align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, wordSize) >> LogBytesPerWord;
++  if (comp_args_on_stack != 0) {
++    __ sub(t0, sp, comp_words_on_stack * wordSize);
++    __ andi(sp, t0, -16);
++  }
 +
-+// vector mla
++  // Will jump to the compiled code just as if compiled code was doing it.
++  // Pre-load the register-jump target early, to schedule it better.
++  __ ld(t1, Address(xmethod, in_bytes(Method::from_compiled_offset())));
 +
-+// dst_src1 = dst_src1 + src2 * src3
-+instruct vmlaB(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaB" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // Now generate the shuffle code.
++  for (int i = 0; i < total_args_passed; i++) {
++    if (sig_bt[i] == T_VOID) {
++      assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half");
++      continue;
++    }
 +
-+// dst_src1 = dst_src1 + src2 * src3
-+instruct vmlaS(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaS" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++    // Pick up 0, 1 or 2 words from SP+offset.
 +
-+// dst_src1 = dst_src1 + src2 * src3
-+instruct vmlaI(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
++           "scrambled load targets?");
++    // Load in argument order going down.
++    int ld_off = (total_args_passed - i - 1) * Interpreter::stackElementSize;
++    // Point to interpreter value (vs. tag)
++    int next_off = ld_off - Interpreter::stackElementSize;
 +
-+// dst_src1 = dst_src1 + src2 * src3
-+instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaL" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++    VMReg r_1 = regs[i].first();
++    VMReg r_2 = regs[i].second();
++    if (!r_1->is_valid()) {
++      assert(!r_2->is_valid(), "");
++      continue;
++    }
++    if (r_1->is_stack()) {
++      // Convert stack slot to an SP offset (+ wordSize to account for return address )
++      int st_off = regs[i].first()->reg2stack() * VMRegImpl::stack_slot_size;
++      if (!r_2->is_valid()) {
++        __ lw(t0, Address(esp, ld_off));
++        __ sd(t0, Address(sp, st_off), /*temp register*/t2);
++      } else {
++        //
++        // We are using two optoregs. This can be either T_OBJECT,
++        // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
++        // two slots but only uses one for thr T_LONG or T_DOUBLE case
++        // So we must adjust where to pick up the data to match the
++        // interpreter.
++        //
++        // Interpreter local[n] == MSW, local[n+1] == LSW however locals
++        // are accessed as negative so LSW is at LOW address
 +
-+// vector mls
++        // ld_off is MSW so get LSW
++        const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ?
++                           next_off : ld_off;
++        __ ld(t0, Address(esp, offset));
++        // st_off is LSW (i.e. reg.first())
++        __ sd(t0, Address(sp, st_off), /*temp register*/t2);
++      }
++    } else if (r_1->is_Register()) {  // Register argument
++      Register r = r_1->as_Register();
++      if (r_2->is_valid()) {
++        //
++        // We are using two VMRegs. This can be either T_OBJECT,
++        // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
++        // two slots but only uses one for thr T_LONG or T_DOUBLE case
++        // So we must adjust where to pick up the data to match the
++        // interpreter.
 +
-+// dst_src1 = dst_src1 - src2 * src3
-+instruct vmlsB(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsB" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++        const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ?
++                           next_off : ld_off;
 +
-+// dst_src1 = dst_src1 - src2 * src3
-+instruct vmlsS(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsS" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++        // this can be a misaligned move
++        __ ld(r, Address(esp, offset));
++      } else {
++        // sign extend and use a full word?
++        __ lw(r, Address(esp, ld_off));
++      }
++    } else {
++      if (!r_2->is_valid()) {
++        __ flw(r_1->as_FloatRegister(), Address(esp, ld_off));
++      } else {
++        __ fld(r_1->as_FloatRegister(), Address(esp, next_off));
++      }
++    }
++  }
 +
-+// dst_src1 = dst_src1 - src2 * src3
-+instruct vmlsI(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // 6243940 We might end up in handle_wrong_method if
++  // the callee is deoptimized as we race thru here. If that
++  // happens we don't want to take a safepoint because the
++  // caller frame will look interpreted and arguments are now
++  // "compiled" so it is much better to make this transition
++  // invisible to the stack walking code. Unfortunately if
++  // we try and find the callee by normal means a safepoint
++  // is possible. So we stash the desired callee in the thread
++  // and the vm will find there should this case occur.
 +
-+// dst_src1 = dst_src1 - src2 * src3
-+instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsL" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  __ sd(xmethod, Address(xthread, JavaThread::callee_target_offset()));
 +
-+// vector mul
++  __ jr(t1);
++}
 +
-+instruct vmulB(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (MulVB src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulB" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++// ---------------------------------------------------------------
++AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
++                                                            int total_args_passed,
++                                                            int comp_args_on_stack,
++                                                            const BasicType *sig_bt,
++                                                            const VMRegPair *regs,
++                                                            AdapterFingerPrint* fingerprint) {
++  address i2c_entry = __ pc();
++  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
 +
-+instruct vmulS(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (MulVS src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulS" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  address c2i_unverified_entry = __ pc();
++  Label skip_fixup;
 +
-+instruct vmulI(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (MulVI src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  Label ok;
 +
-+instruct vmulL(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (MulVL src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulL" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  const Register holder = t1;
++  const Register receiver = j_rarg0;
++  const Register tmp = t2;  // A call-clobbered register not used for arg passing
 +
-+instruct vmulF(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (MulVF src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // -------------------------------------------------------------------------
++  // Generate a C2I adapter.  On entry we know xmethod holds the Method* during calls
++  // to the interpreter.  The args start out packed in the compiled layout.  They
++  // need to be unpacked into the interpreter layout.  This will almost always
++  // require some stack space.  We grow the current (compiled) stack, then repack
++  // the args.  We  finally end in a jump to the generic interpreter entry point.
++  // On exit from the interpreter, the interpreter will restore our SP (lest the
++  // compiled code, which relys solely on SP and not FP, get sick).
 +
-+instruct vmulD(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (MulVD src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  {
++    __ block_comment("c2i_unverified_entry {");
++    __ load_klass(t0, receiver);
++    __ ld(tmp, Address(holder, CompiledICHolder::holder_klass_offset()));
++    __ ld(xmethod, Address(holder, CompiledICHolder::holder_metadata_offset()));
++    __ beq(t0, tmp, ok);
++    __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 +
-+// vector fneg
++    __ bind(ok);
++    // Method might have been compiled since the call site was patched to
++    // interpreted; if that is the case treat it as a miss so we can get
++    // the call site corrected.
++    __ ld(t0, Address(xmethod, in_bytes(Method::code_offset())));
++    __ beqz(t0, skip_fixup);
++    __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
++    __ block_comment("} c2i_unverified_entry");
++  }
 +
-+instruct vnegF(vReg dst, vReg src) %{
-+  match(Set dst (NegVF src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  address c2i_entry = __ pc();
 +
-+instruct vnegD(vReg dst, vReg src) %{
-+  match(Set dst (NegVD src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
 +
-+// popcount vector
++  __ flush();
++  return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
++}
 +
-+instruct vpopcountI(iRegINoSp dst, vReg src) %{
-+  match(Set dst (PopCountVI src));
-+  format %{ "vpopc.m $dst, $src\t#@vpopcountI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vpopc_m(as_Register($dst$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
++                                         VMRegPair *regs,
++                                         VMRegPair *regs2,
++                                         int total_args_passed) {
++  assert(regs2 == NULL, "not needed on riscv");
 +
-+// vector add reduction
++  // We return the amount of VMRegImpl stack slots we need to reserve for all
++  // the arguments NOT counting out_preserve_stack_slots.
 +
-+instruct reduce_addB(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
-+  match(Set dst (AddReductionVI src1 src2));
-+  effect(TEMP tmp);
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addB\n\t"
-+            "vredsum.vs $tmp, $src2, $tmp\n\t"
-+            "vmv.x.s  $dst, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
-+    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
-+                  as_VectorRegister($tmp$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
++    c_rarg0, c_rarg1, c_rarg2, c_rarg3,
++    c_rarg4, c_rarg5,  c_rarg6,  c_rarg7
++  };
++  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
++    c_farg0, c_farg1, c_farg2, c_farg3,
++    c_farg4, c_farg5, c_farg6, c_farg7
++  };
 +
-+instruct reduce_addS(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
-+  match(Set dst (AddReductionVI src1 src2));
-+  effect(TEMP tmp);
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addS\n\t"
-+            "vredsum.vs $tmp, $src2, $tmp\n\t"
-+            "vmv.x.s  $dst, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
-+    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
-+                  as_VectorRegister($tmp$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  uint int_args = 0;
++  uint fp_args = 0;
++  uint stk_args = 0; // inc by 2 each time
 +
-+instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
-+  match(Set dst (AddReductionVI src1 src2));
-+  effect(TEMP tmp);
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addI\n\t"
-+            "vredsum.vs $tmp, $src2, $tmp\n\t"
-+            "vmv.x.s  $dst, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
-+    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
-+                  as_VectorRegister($tmp$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  for (int i = 0; i < total_args_passed; i++) {
++    switch (sig_bt[i]) {
++      case T_BOOLEAN:  // fall through
++      case T_CHAR:     // fall through
++      case T_BYTE:     // fall through
++      case T_SHORT:    // fall through
++      case T_INT:
++        if (int_args < Argument::n_int_register_parameters_c) {
++          regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
++        } else {
++          regs[i].set1(VMRegImpl::stack2reg(stk_args));
++          stk_args += 2;
++        }
++        break;
++      case T_LONG:      // fall through
++        assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
++      case T_OBJECT:    // fall through
++      case T_ARRAY:     // fall through
++      case T_ADDRESS:   // fall through
++      case T_METADATA:
++        if (int_args < Argument::n_int_register_parameters_c) {
++          regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
++        } else {
++          regs[i].set2(VMRegImpl::stack2reg(stk_args));
++          stk_args += 2;
++        }
++        break;
++      case T_FLOAT:
++        if (fp_args < Argument::n_float_register_parameters_c) {
++          regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
++        } else if (int_args < Argument::n_int_register_parameters_c) {
++          regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
++        } else {
++          regs[i].set1(VMRegImpl::stack2reg(stk_args));
++          stk_args += 2;
++        }
++        break;
++      case T_DOUBLE:
++        assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
++        if (fp_args < Argument::n_float_register_parameters_c) {
++          regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
++        } else if (int_args < Argument::n_int_register_parameters_c) {
++          regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
++        } else {
++          regs[i].set2(VMRegImpl::stack2reg(stk_args));
++          stk_args += 2;
++        }
++        break;
++      case T_VOID: // Halves of longs and doubles
++        assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
++        regs[i].set_bad();
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++  }
 +
-+instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
-+  match(Set dst (AddReductionVL src1 src2));
-+  effect(TEMP tmp);
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addL\n\t"
-+            "vredsum.vs $tmp, $src2, $tmp\n\t"
-+            "vmv.x.s  $dst, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
-+    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
-+                  as_VectorRegister($tmp$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  return stk_args;
++}
 +
-+instruct reduce_addF(fRegF src1_dst, vReg src2, vReg tmp) %{
-+  match(Set src1_dst (AddReductionVF src1_dst src2));
-+  effect(TEMP tmp);
-+  ins_cost(VEC_COST);
-+  format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addF\n\t"
-+            "vfredosum.vs $tmp, $src2, $tmp\n\t"
-+            "vfmv.f.s $src1_dst, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister);
-+    __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
-+                    as_VectorRegister($tmp$$reg));
-+    __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++// The C ABI specifies:
++// "integer scalars narrower than XLEN bits are widened according to the sign
++// of their type up to 32 bits, then sign-extended to XLEN bits."
++// Applies for both passed in register and stack.
++//
++// Java uses 32-bit stack slots; jint, jshort, jchar, jbyte uses one slot.
++// Native uses 64-bit stack slots for all integer scalar types.
++//
++// lw loads the Java stack slot, sign-extends and
++// sd store this widened integer into a 64 bit native stack slot.
++static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      // stack to stack
++      __ lw(t0, Address(fp, reg2offset_in(src.first())));
++      __ sd(t0, Address(sp, reg2offset_out(dst.first())));
++    } else {
++      // stack to reg
++      __ lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
++    }
++  } else if (dst.first()->is_stack()) {
++    // reg to stack
++    __ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
++  } else {
++    if (dst.first() != src.first()) {
++      // 32bits extend sign
++      __ addw(dst.first()->as_Register(), src.first()->as_Register(), zr);
++    }
++  }
++}
 +
-+instruct reduce_addD(fRegD src1_dst, vReg src2, vReg tmp) %{
-+  match(Set src1_dst (AddReductionVD src1_dst src2));
-+  effect(TEMP tmp);
-+  ins_cost(VEC_COST);
-+  format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addD\n\t"
-+            "vfredosum.vs $tmp, $src2, $tmp\n\t"
-+            "vfmv.f.s $src1_dst, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister);
-+    __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
-+                    as_VectorRegister($tmp$$reg));
-+    __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++// An oop arg. Must pass a handle not the oop itself
++static void object_move(MacroAssembler* masm,
++                        OopMap* map,
++                        int oop_handle_offset,
++                        int framesize_in_slots,
++                        VMRegPair src,
++                        VMRegPair dst,
++                        bool is_receiver,
++                        int* receiver_offset) {
++  // must pass a handle. First figure out the location we use as a handle
++  Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register();
 +
-+// vector integer max reduction
-+instruct vreduce_maxB(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
-+  match(Set dst (MaxReductionV src1 src2));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vreduce_maxB $dst, $src1, $src2, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
-+    Label Ldone;
-+    __ ble(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
-+    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
-+    __ bind(Ldone);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // See if oop is NULL if it is we need no handle
 +
-+instruct vreduce_maxS(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
-+  match(Set dst (MaxReductionV src1 src2));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vreduce_maxS $dst, $src1, $src2, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
-+    Label Ldone;
-+    __ ble(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
-+    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
-+    __ bind(Ldone);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  if (src.first()->is_stack()) {
 +
-+instruct vreduce_maxI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
-+  match(Set dst (MaxReductionV src1 src2));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vreduce_maxI $dst, $src1, $src2, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
-+    __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++    // Oop is already on the stack as an argument
++    int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
++    map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
++    if (is_receiver) {
++      *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
++    }
 +
-+instruct vreduce_maxL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
-+  match(Set dst (MaxReductionV src1 src2));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vreduce_maxL $dst, $src1, $src2, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
-+    __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
-+
-+// vector integer min reduction
-+instruct vreduce_minB(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
-+  match(Set dst (MinReductionV src1 src2));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vreduce_minB $dst, $src1, $src2, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
-+    Label Ldone;
-+    __ bge(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
-+    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
-+    __ bind(Ldone);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++    __ ld(t0, Address(fp, reg2offset_in(src.first())));
++    __ la(rHandle, Address(fp, reg2offset_in(src.first())));
++    // conditionally move a NULL
++    Label notZero1;
++    __ bnez(t0, notZero1);
++    __ mv(rHandle, zr);
++    __ bind(notZero1);
++  } else {
 +
-+instruct vreduce_minS(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
-+  match(Set dst (MinReductionV src1 src2));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vreduce_minS $dst, $src1, $src2, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
-+    Label Ldone;
-+    __ bge(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
-+    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
-+    __ bind(Ldone);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++    // Oop is in an a register we must store it to the space we reserve
++    // on the stack for oop_handles and pass a handle if oop is non-NULL
 +
-+instruct vreduce_minI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
-+  match(Set dst (MinReductionV src1 src2));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vreduce_minI $dst, $src1, $src2, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
-+    __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++    const Register rOop = src.first()->as_Register();
++    int oop_slot = -1;
++    if (rOop == j_rarg0) {
++      oop_slot = 0;
++    } else if (rOop == j_rarg1) {
++      oop_slot = 1;
++    } else if (rOop == j_rarg2) {
++      oop_slot = 2;
++    } else if (rOop == j_rarg3) {
++      oop_slot = 3;
++    } else if (rOop == j_rarg4) {
++      oop_slot = 4;
++    } else if (rOop == j_rarg5) {
++      oop_slot = 5;
++    } else if (rOop == j_rarg6) {
++      oop_slot = 6;
++    } else {
++      assert(rOop == j_rarg7, "wrong register");
++      oop_slot = 7;
++    }
 +
-+instruct vreduce_minL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
-+  match(Set dst (MinReductionV src1 src2));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vreduce_minL $dst, $src1, $src2, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
-+    __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++    oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset;
++    int offset = oop_slot * VMRegImpl::stack_slot_size;
 +
-+// vector float max reduction
++    map->set_oop(VMRegImpl::stack2reg(oop_slot));
++    // Store oop in handle area, may be NULL
++    __ sd(rOop, Address(sp, offset));
++    if (is_receiver) {
++      *receiver_offset = offset;
++    }
 +
-+instruct vreduce_maxF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
-+  match(Set dst (MaxReductionV src1 src2));
-+  ins_cost(VEC_COST);
-+  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
-+  format %{ "reduce_maxF $dst, $src1, $src2, $tmp1, $tmp2" %}
-+  ins_encode %{
-+    __ reduce_minmax_FD_v($dst$$FloatRegister,
-+                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
-+                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
-+                          false /* is_double */, false /* is_min */);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++    //rOop maybe the same as rHandle
++    if (rOop == rHandle) {
++      Label isZero;
++      __ beqz(rOop, isZero);
++      __ la(rHandle, Address(sp, offset));
++      __ bind(isZero);
++    } else {
++      Label notZero2;
++      __ la(rHandle, Address(sp, offset));
++      __ bnez(rOop, notZero2);
++      __ mv(rHandle, zr);
++      __ bind(notZero2);
++    }
++  }
 +
-+instruct vreduce_maxD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
-+  match(Set dst (MaxReductionV src1 src2));
-+  ins_cost(VEC_COST);
-+  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
-+  format %{ "reduce_maxD $dst, $src1, $src2, $tmp1, $tmp2" %}
-+  ins_encode %{
-+    __ reduce_minmax_FD_v($dst$$FloatRegister,
-+                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
-+                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
-+                          true /* is_double */, false /* is_min */);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // If arg is on the stack then place it otherwise it is already in correct reg.
++  if (dst.first()->is_stack()) {
++    __ sd(rHandle, Address(sp, reg2offset_out(dst.first())));
++  }
++}
 +
-+// vector float min reduction
++// A float arg may have to do float reg int reg conversion
++static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  assert(src.first()->is_stack() && dst.first()->is_stack() ||
++         src.first()->is_reg() && dst.first()->is_reg() || src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      __ lwu(t0, Address(fp, reg2offset_in(src.first())));
++      __ sw(t0, Address(sp, reg2offset_out(dst.first())));
++    } else if (dst.first()->is_Register()) {
++      __ lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
++    } else {
++      ShouldNotReachHere();
++    }
++  } else if (src.first() != dst.first()) {
++    if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
++      __ fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
++    } else {
++      ShouldNotReachHere();
++    }
++  }
++}
 +
-+instruct vreduce_minF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
-+  match(Set dst (MinReductionV src1 src2));
-+  ins_cost(VEC_COST);
-+  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
-+  format %{ "reduce_minF $dst, $src1, $src2, $tmp1, $tmp2" %}
-+  ins_encode %{
-+    __ reduce_minmax_FD_v($dst$$FloatRegister,
-+                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
-+                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
-+                          false /* is_double */, true /* is_min */);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++// A long move
++static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      // stack to stack
++      __ ld(t0, Address(fp, reg2offset_in(src.first())));
++      __ sd(t0, Address(sp, reg2offset_out(dst.first())));
++    } else {
++      // stack to reg
++      __ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
++    }
++  } else if (dst.first()->is_stack()) {
++    // reg to stack
++    __ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
++  } else {
++    if (dst.first() != src.first()) {
++      __ mv(dst.first()->as_Register(), src.first()->as_Register());
++    }
++  }
++}
 +
-+instruct vreduce_minD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
-+  match(Set dst (MinReductionV src1 src2));
-+  ins_cost(VEC_COST);
-+  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
-+  format %{ "reduce_minD $dst, $src1, $src2, $tmp1, $tmp2" %}
-+  ins_encode %{
-+    __ reduce_minmax_FD_v($dst$$FloatRegister,
-+                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
-+                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
-+                          true /* is_double */, true /* is_min */);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++// A double move
++static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  assert(src.first()->is_stack() && dst.first()->is_stack() ||
++         src.first()->is_reg() && dst.first()->is_reg() || src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      __ ld(t0, Address(fp, reg2offset_in(src.first())));
++      __ sd(t0, Address(sp, reg2offset_out(dst.first())));
++    } else if (dst.first()-> is_Register()) {
++      __ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
++    } else {
++      ShouldNotReachHere();
++    }
++  } else if (src.first() != dst.first()) {
++    if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
++      __ fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
++    } else {
++      ShouldNotReachHere();
++    }
++  }
++}
 +
-+// vector Math.rint, floor, ceil
++void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
++  // We always ignore the frame_slots arg and just use the space just below frame pointer
++  // which by this time is free to use
++  switch (ret_type) {
++    case T_FLOAT:
++      __ fsw(f10, Address(fp, -3 * wordSize));
++      break;
++    case T_DOUBLE:
++      __ fsd(f10, Address(fp, -3 * wordSize));
++      break;
++    case T_VOID:  break;
++    default: {
++      __ sd(x10, Address(fp, -3 * wordSize));
++    }
++  }
++}
 +
-+instruct vroundD(vReg dst, vReg src, immI rmode) %{
-+  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
-+  match(Set dst (RoundDoubleModeV src rmode));
-+  format %{ "vroundD $dst, $src, $rmode" %}
-+  ins_encode %{
-+    switch ($rmode$$constant) {
-+      case RoundDoubleModeNode::rmode_rint:
-+        __ csrwi(CSR_FRM, C2_MacroAssembler::rne);
-+        __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
-+        break;
-+      case RoundDoubleModeNode::rmode_floor:
-+        __ csrwi(CSR_FRM, C2_MacroAssembler::rdn);
-+        __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
-+        break;
-+      case RoundDoubleModeNode::rmode_ceil:
-+        __ csrwi(CSR_FRM, C2_MacroAssembler::rup);
-+        __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
-+        break;
-+      default:
-+        ShouldNotReachHere();
-+        break;
++void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
++  // We always ignore the frame_slots arg and just use the space just below frame pointer
++  // which by this time is free to use
++  switch (ret_type) {
++    case T_FLOAT:
++      __ flw(f10, Address(fp, -3 * wordSize));
++      break;
++    case T_DOUBLE:
++      __ fld(f10, Address(fp, -3 * wordSize));
++      break;
++    case T_VOID:  break;
++    default: {
++      __ ld(x10, Address(fp, -3 * wordSize));
 +    }
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  }
++}
 +
-+// vector replicate
++static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
++  RegSet x;
++  for ( int i = first_arg ; i < arg_count ; i++ ) {
++    if (args[i].first()->is_Register()) {
++      x = x + args[i].first()->as_Register();
++    } else if (args[i].first()->is_FloatRegister()) {
++      __ addi(sp, sp, -2 * wordSize);
++      __ fsd(args[i].first()->as_FloatRegister(), Address(sp, 0));
++    }
++  }
++  __ push_reg(x, sp);
++}
 +
-+instruct replicateB(vReg dst, iRegIorL2I src) %{
-+  match(Set dst (ReplicateB src));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.x  $dst, $src\t#@replicateB" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
++  RegSet x;
++  for ( int i = first_arg ; i < arg_count ; i++ ) {
++    if (args[i].first()->is_Register()) {
++      x = x + args[i].first()->as_Register();
++    } else {
++      ;
++    }
++  }
++  __ pop_reg(x, sp);
++  for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
++    if (args[i].first()->is_Register()) {
++      ;
++    } else if (args[i].first()->is_FloatRegister()) {
++      __ fld(args[i].first()->as_FloatRegister(), Address(sp, 0));
++      __ add(sp, sp, 2 * wordSize);
++    }
++  }
++}
 +
-+instruct replicateS(vReg dst, iRegIorL2I src) %{
-+  match(Set dst (ReplicateS src));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.x  $dst, $src\t#@replicateS" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++static void rt_call(MacroAssembler* masm, address dest) {
++  CodeBlob *cb = CodeCache::find_blob(dest);
++  if (cb) {
++    __ far_call(RuntimeAddress(dest));
++  } else {
++    int32_t offset = 0;
++    __ la_patchable(t0, RuntimeAddress(dest), offset);
++    __ jalr(x1, t0, offset);
++  }
++}
 +
-+instruct replicateI(vReg dst, iRegIorL2I src) %{
-+  match(Set dst (ReplicateI src));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.x  $dst, $src\t#@replicateI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++static void verify_oop_args(MacroAssembler* masm,
++                            const methodHandle& method,
++                            const BasicType* sig_bt,
++                            const VMRegPair* regs) {
++  const Register temp_reg = x9;  // not part of any compiled calling seq
++  if (VerifyOops) {
++    for (int i = 0; i < method->size_of_parameters(); i++) {
++      if (sig_bt[i] == T_OBJECT ||
++          sig_bt[i] == T_ARRAY) {
++        VMReg r = regs[i].first();
++        assert(r->is_valid(), "bad oop arg");
++        if (r->is_stack()) {
++          __ ld(temp_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
++          __ verify_oop(temp_reg);
++        } else {
++          __ verify_oop(r->as_Register());
++        }
++      }
++    }
++  }
++}
 +
-+instruct replicateL(vReg dst, iRegL src) %{
-+  match(Set dst (ReplicateL src));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.x  $dst, $src\t#@replicateL" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++static void gen_special_dispatch(MacroAssembler* masm,
++                                 const methodHandle& method,
++                                 const BasicType* sig_bt,
++                                 const VMRegPair* regs) {
++  verify_oop_args(masm, method, sig_bt, regs);
++  vmIntrinsics::ID iid = method->intrinsic_id();
 +
-+instruct replicateB_imm5(vReg dst, immI5 con) %{
-+  match(Set dst (ReplicateB con));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.i  $dst, $con\t#@replicateB_imm5" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // Now write the args into the outgoing interpreter space
++  bool     has_receiver   = false;
++  Register receiver_reg   = noreg;
++  int      member_arg_pos = -1;
++  Register member_reg     = noreg;
++  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
++  if (ref_kind != 0) {
++    member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
++    member_reg = x9;  // known to be free at this point
++    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
++  } else if (iid == vmIntrinsics::_invokeBasic) {
++    has_receiver = true;
++  } else {
++    fatal("unexpected intrinsic id %d", iid);
++  }
 +
-+instruct replicateS_imm5(vReg dst, immI5 con) %{
-+  match(Set dst (ReplicateS con));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.i  $dst, $con\t#@replicateS_imm5" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  if (member_reg != noreg) {
++    // Load the member_arg into register, if necessary.
++    SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
++    VMReg r = regs[member_arg_pos].first();
++    if (r->is_stack()) {
++      __ ld(member_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
++    } else {
++      // no data motion is needed
++      member_reg = r->as_Register();
++    }
++  }
 +
-+instruct replicateI_imm5(vReg dst, immI5 con) %{
-+  match(Set dst (ReplicateI con));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.i  $dst, $con\t#@replicateI_imm5" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  if (has_receiver) {
++    // Make sure the receiver is loaded into a register.
++    assert(method->size_of_parameters() > 0, "oob");
++    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
++    VMReg r = regs[0].first();
++    assert(r->is_valid(), "bad receiver arg");
++    if (r->is_stack()) {
++      // Porting note:  This assumes that compiled calling conventions always
++      // pass the receiver oop in a register.  If this is not true on some
++      // platform, pick a temp and load the receiver from stack.
++      fatal("receiver always in a register");
++      receiver_reg = x12;  // known to be free at this point
++      __ ld(receiver_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
++    } else {
++      // no data motion is needed
++      receiver_reg = r->as_Register();
++    }
++  }
 +
-+instruct replicateL_imm5(vReg dst, immL5 con) %{
-+  match(Set dst (ReplicateL con));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.i  $dst, $con\t#@replicateL_imm5" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // Figure out which address we are really jumping to:
++  MethodHandles::generate_method_handle_dispatch(masm, iid,
++                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
++}
 +
-+instruct replicateF(vReg dst, fRegF src) %{
-+  match(Set dst (ReplicateF src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmv.v.f  $dst, $src\t#@replicateF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++// ---------------------------------------------------------------------------
++// Generate a native wrapper for a given method.  The method takes arguments
++// in the Java compiled code convention, marshals them to the native
++// convention (handlizes oops, etc), transitions to native, makes the call,
++// returns to java state (possibly blocking), unhandlizes any result and
++// returns.
++//
++// Critical native functions are a shorthand for the use of
++// GetPrimtiveArrayCritical and disallow the use of any other JNI
++// functions.  The wrapper is expected to unpack the arguments before
++// passing them to the callee and perform checks before and after the
++// native call to ensure that they GCLocker
++// lock_critical/unlock_critical semantics are followed.  Some other
++// parts of JNI setup are skipped like the tear down of the JNI handle
++// block and the check for pending exceptions it's impossible for them
++// to be thrown.
++//
++// They are roughly structured like this:
++//    if (GCLocker::needs_gc()) SharedRuntime::block_for_jni_critical()
++//    tranistion to thread_in_native
++//    unpack arrray arguments and call native entry point
++//    check for safepoint in progress
++//    check if any thread suspend flags are set
++//      call into JVM and possible unlock the JNI critical
++//      if a GC was suppressed while in the critical native.
++//    transition back to thread_in_Java
++//    return to caller
++//
++nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
++                                                const methodHandle& method,
++                                                int compile_id,
++                                                BasicType* in_sig_bt,
++                                                VMRegPair* in_regs,
++                                                BasicType ret_type,
++                                                address critical_entry) {
++  if (method->is_method_handle_intrinsic()) {
++    vmIntrinsics::ID iid = method->intrinsic_id();
++    intptr_t start = (intptr_t)__ pc();
++    int vep_offset = ((intptr_t)__ pc()) - start;
 +
-+instruct replicateD(vReg dst, fRegD src) %{
-+  match(Set dst (ReplicateD src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmv.v.f  $dst, $src\t#@replicateD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++    // First instruction must be a nop as it may need to be patched on deoptimisation
++    MacroAssembler::assert_alignment(__ pc());
++    __ nop();
++    gen_special_dispatch(masm,
++                         method,
++                         in_sig_bt,
++                         in_regs);
++    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
++    __ flush();
++    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
++    return nmethod::new_native_nmethod(method,
++                                       compile_id,
++                                       masm->code(),
++                                       vep_offset,
++                                       frame_complete,
++                                       stack_slots / VMRegImpl::slots_per_word,
++                                       in_ByteSize(-1),
++                                       in_ByteSize(-1),
++                                       (OopMapSet*)NULL);
++  }
++  address native_func = method->native_function();
++  assert(native_func != NULL, "must have function");
 +
-+// vector shift
++  // An OopMap for lock (and class if static)
++  OopMapSet *oop_maps = new OopMapSet();
++  assert_cond(oop_maps != NULL);
++  intptr_t start = (intptr_t)__ pc();
 +
-+instruct vasrB(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (RShiftVB src shift));
-+  ins_cost(VEC_COST);
-+  effect(TEMP_DEF dst);
-+  format %{ "vmsgtu.vi v0, $shift 7\t#@vasrB\n\t"
-+            "vsra.vi $dst, $src, 7, Assembler::v0_t\n\t"
-+            "vmnot.m v0, v0\n\t"
-+            "vsra.vv $dst, $src, $shift, Assembler::v0_t" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    // if shift > BitsPerByte - 1, clear the low BitsPerByte - 1 bits
-+    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
-+    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               BitsPerByte - 1, Assembler::v0_t);
-+    // otherwise, shift
-+    __ vmnot_m(v0, v0);
-+    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg), Assembler::v0_t);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // We have received a description of where all the java arg are located
++  // on entry to the wrapper. We need to convert these args to where
++  // the jni function will expect them. To figure out where they go
++  // we convert the java signature to a C signature by inserting
++  // the hidden arguments as arg[0] and possibly arg[1] (static method)
 +
-+instruct vasrS(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (RShiftVS src shift));
-+  ins_cost(VEC_COST);
-+  effect(TEMP_DEF dst);
-+  format %{ "vmsgtu.vi v0, $shift, 15\t#@vasrS\n\t"
-+            "vsra.vi $dst, $src, 15, Assembler::v0_t\n\t"
-+            "vmnot.m v0, v0\n\t"
-+            "vsra.vv $dst, $src, $shift, Assembler::v0_t" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    // if shift > BitsPerShort - 1, clear the low BitsPerShort - 1 bits
-+    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
-+    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               BitsPerShort - 1, Assembler::v0_t);
-+    // otherwise, shift
-+    __ vmnot_m(v0, v0);
-+    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg), Assembler::v0_t);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  const int total_in_args = method->size_of_parameters();
++  int total_c_args = total_in_args + (method->is_static() ? 2 : 1);
 +
-+instruct vasrI(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (RShiftVI src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsra.vv $dst, $src, $shift\t#@vasrI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
++  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
++  BasicType* in_elem_bt = NULL;
 +
-+instruct vasrL(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (RShiftVL src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsra.vv $dst, $src, $shift\t#@vasrL" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+         as_VectorRegister($shift$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  int argc = 0;
++  out_sig_bt[argc++] = T_ADDRESS;
++  if (method->is_static()) {
++    out_sig_bt[argc++] = T_OBJECT;
++  }
 +
-+instruct vlslB(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (LShiftVB src shift));
-+  ins_cost(VEC_COST);
-+  effect( TEMP_DEF dst);
-+  format %{ "vmsgtu.vi v0, $shift, 7\t#@vlslB\n\t"
-+            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
-+            "vmnot.m v0, v0\n\t"
-+            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    // if shift > BitsPerByte - 1, clear the element
-+    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
-+    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($src$$reg), Assembler::v0_t);
-+    // otherwise, shift
-+    __ vmnot_m(v0, v0);
-+    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg), Assembler::v0_t);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  for (int i = 0; i < total_in_args ; i++) {
++    out_sig_bt[argc++] = in_sig_bt[i];
++  }
 +
-+instruct vlslS(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (LShiftVS src shift));
-+  ins_cost(VEC_COST);
-+  effect(TEMP_DEF dst);
-+  format %{ "vmsgtu.vi v0, $shift, 15\t#@vlslS\n\t"
-+            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
-+            "vmnot.m v0, v0\n\t"
-+            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    // if shift > BitsPerShort - 1, clear the element
-+    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
-+    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($src$$reg), Assembler::v0_t);
-+    // otherwise, shift
-+    __ vmnot_m(v0, v0);
-+    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg), Assembler::v0_t);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // Now figure out where the args must be stored and how much stack space
++  // they require.
++  int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
 +
-+instruct vlslI(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (LShiftVI src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsll.vv $dst, $src, $shift\t#@vlslI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // Compute framesize for the wrapper.  We need to handlize all oops in
++  // incoming registers
 +
-+instruct vlslL(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (LShiftVL src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsll.vv $dst, $src, $shift\t# vector (D)" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // Calculate the total number of stack slots we will need.
 +
-+instruct vlsrB(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (URShiftVB src shift));
-+  ins_cost(VEC_COST);
-+  effect(TEMP_DEF dst);
-+  format %{ "vmsgtu.vi v0, $shift, 7\t#@vlsrB\n\t"
-+            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
-+            "vmnot.m v0, v0, v0\n\t"
-+            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    // if shift > BitsPerByte - 1, clear the element
-+    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
-+    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($src$$reg), Assembler::v0_t);
-+    // otherwise, shift
-+    __ vmnot_m(v0, v0);
-+    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg), Assembler::v0_t);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // First count the abi requirement plus all of the outgoing args
++  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
 +
-+instruct vlsrS(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (URShiftVS src shift));
-+  ins_cost(VEC_COST);
-+  effect(TEMP_DEF dst);
-+  format %{ "vmsgtu.vi v0, $shift, 15\t#@vlsrS\n\t"
-+            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
-+            "vmnot.m v0, v0\n\t"
-+            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    // if shift > BitsPerShort - 1, clear the element
-+    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
-+    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($src$$reg), Assembler::v0_t);
-+    // otherwise, shift
-+    __ vmnot_m(v0, v0);
-+    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg), Assembler::v0_t);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // Now the space for the inbound oop handle area
++  int total_save_slots = 8 * VMRegImpl::slots_per_word;  // 8 arguments passed in registers
 +
++  int oop_handle_offset = stack_slots;
++  stack_slots += total_save_slots;
 +
-+instruct vlsrI(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (URShiftVI src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // Now any space we need for handlizing a klass if static method
 +
++  int klass_slot_offset = 0;
++  int klass_offset = -1;
++  int lock_slot_offset = 0;
++  bool is_static = false;
 +
-+instruct vlsrL(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (URShiftVL src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrL" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct vasrB_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (RShiftVB src (RShiftCntV shift)));
-+  ins_cost(VEC_COST);
-+  format %{ "vsra.vi $dst, $src, $shift\t#@vasrB_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    if (con >= BitsPerByte) con = BitsPerByte - 1;
-+    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  if (method->is_static()) {
++    klass_slot_offset = stack_slots;
++    stack_slots += VMRegImpl::slots_per_word;
++    klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
++    is_static = true;
++  }
 +
-+instruct vasrS_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (RShiftVS src (RShiftCntV shift)));
-+  ins_cost(VEC_COST);
-+  format %{ "vsra.vi $dst, $src, $shift\t#@vasrS_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    if (con >= BitsPerShort) con = BitsPerShort - 1;
-+    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // Plus a lock if needed
 +
-+instruct vasrI_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (RShiftVI src (RShiftCntV shift)));
-+  ins_cost(VEC_COST);
-+  format %{ "vsrl.vi $dst, $src, $shift\t#@vasrI_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  if (method->is_synchronized()) {
++    lock_slot_offset = stack_slots;
++    stack_slots += VMRegImpl::slots_per_word;
++  }
 +
-+instruct vasrL_imm(vReg dst, vReg src, immI shift) %{
-+  predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32);
-+  match(Set dst (RShiftVL src (RShiftCntV shift)));
-+  ins_cost(VEC_COST);
-+  format %{ "vsrl.vi $dst, $src, $shift\t#@vasrL_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // Now a place (+2) to save return values or temp during shuffling
++  // + 4 for return address (which we own) and saved fp
++  stack_slots += 6;
 +
-+instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (URShiftVB src (RShiftCntV shift)));
-+  ins_cost(VEC_COST);
-+  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrB_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    if (con >= BitsPerByte) {
-+      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                 as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // Ok The space we have allocated will look like:
++  //
++  //
++  // FP-> |                     |
++  //      | 2 slots (ra)        |
++  //      | 2 slots (fp)        |
++  //      |---------------------|
++  //      | 2 slots for moves   |
++  //      |---------------------|
++  //      | lock box (if sync)  |
++  //      |---------------------| <- lock_slot_offset
++  //      | klass (if static)   |
++  //      |---------------------| <- klass_slot_offset
++  //      | oopHandle area      |
++  //      |---------------------| <- oop_handle_offset (8 java arg registers)
++  //      | outbound memory     |
++  //      | based arguments     |
++  //      |                     |
++  //      |---------------------|
++  //      |                     |
++  // SP-> | out_preserved_slots |
++  //
++  //
 +
-+instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (URShiftVS src (RShiftCntV shift)));
-+  ins_cost(VEC_COST);
-+  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrS_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    if (con >= BitsPerShort) {
-+      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                 as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
 +
-+instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (URShiftVI src (RShiftCntV shift)));
-+  ins_cost(VEC_COST);
-+  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrI_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // Now compute actual number of stack words we need rounding to make
++  // stack properly aligned.
++  stack_slots = align_up(stack_slots, StackAlignmentInSlots);
 +
-+instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{
-+  predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32);
-+  match(Set dst (URShiftVL src (RShiftCntV shift)));
-+  ins_cost(VEC_COST);
-+  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrL_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
 +
-+instruct vlslB_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (LShiftVB src (LShiftCntV shift)));
-+  ins_cost(VEC_COST);
-+  format %{ "vsll.vi $dst, $src, $shift\t#@vlslB_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    if (con >= BitsPerByte) {
-+      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                 as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // First thing make an ic check to see if we should even be here
 +
-+instruct vlslS_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (LShiftVS src (LShiftCntV shift)));
-+  ins_cost(VEC_COST);
-+  format %{ "vsll.vi $dst, $src, $shift\t#@vlslS_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    if (con >= BitsPerShort) {
-+      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                 as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // We are free to use all registers as temps without saving them and
++  // restoring them except fp. fp is the only callee save register
++  // as far as the interpreter and the compiler(s) are concerned.
 +
-+instruct vlslI_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (LShiftVI src (LShiftCntV shift)));
-+  ins_cost(VEC_COST);
-+  format %{ "vsll.vi $dst, $src, $shift\t#@vlslI_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
 +
-+instruct vlslL_imm(vReg dst, vReg src, immI shift) %{
-+  predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32);
-+  match(Set dst (LShiftVL src (LShiftCntV shift)));
-+  ins_cost(VEC_COST);
-+  format %{ "vsll.vi $dst, $src, $shift\t#@vlslL_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  const Register ic_reg = t1;
++  const Register receiver = j_rarg0;
 +
-+instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{
-+  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
-+  match(Set dst (LShiftCntV cnt));
-+  match(Set dst (RShiftCntV cnt));
-+  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntB" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  Label hit;
++  Label exception_pending;
 +
-+instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{
-+  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
-+            n->bottom_type()->is_vect()->element_basic_type() == T_CHAR);
-+  match(Set dst (LShiftCntV cnt));
-+  match(Set dst (RShiftCntV cnt));
-+  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntS" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  assert_different_registers(ic_reg, receiver, t0);
++  __ verify_oop(receiver);
++  __ cmp_klass(receiver, ic_reg, t0, hit);
 +
-+instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{
-+  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_INT);
-+  match(Set dst (LShiftCntV cnt));
-+  match(Set dst (RShiftCntV cnt));
-+  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 +
-+instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{
-+  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
-+  match(Set dst (LShiftCntV cnt));
-+  match(Set dst (RShiftCntV cnt));
-+  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntL" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // Verified entry point must be aligned
++  __ align(8);
 +
-+// vector sqrt
++  __ bind(hit);
 +
-+instruct vsqrtF(vReg dst, vReg src) %{
-+  match(Set dst (SqrtVF src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsqrt.v $dst, $src\t#@vsqrtF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  int vep_offset = ((intptr_t)__ pc()) - start;
 +
-+instruct vsqrtD(vReg dst, vReg src) %{
-+  match(Set dst (SqrtVD src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsqrt.v $dst, $src\t#@vsqrtD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // If we have to make this method not-entrant we'll overwrite its
++  // first instruction with a jump.
++  MacroAssembler::assert_alignment(__ pc());
++  __ nop();
 +
-+// vector sub
++  // Generate stack overflow check
++  __ bang_stack_with_offset((int)JavaThread::stack_shadow_zone_size());
 +
-+instruct vsubB(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (SubVB src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubB" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // Generate a new frame for the wrapper.
++  __ enter();
++  // -2 because return address is already present and so is saved fp
++  __ sub(sp, sp, stack_size - 2 * wordSize);
 +
-+instruct vsubS(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (SubVS src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubS" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // Frame is now completed as far as size and linkage.
++  int frame_complete = ((intptr_t)__ pc()) - start;
 +
-+instruct vsubI(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (SubVI src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // We use x18 as the oop handle for the receiver/klass
++  // It is callee save so it survives the call to native
 +
-+instruct vsubL(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (SubVL src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubL" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  const Register oop_handle_reg = x18;
 +
-+instruct vsubF(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (SubVF src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsub.vv $dst, $src1, $src2\t@vsubF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  //
++  // We immediately shuffle the arguments so that any vm call we have to
++  // make from here on out (sync slow path, jvmti, etc.) we will have
++  // captured the oops from our caller and have a valid oopMap for
++  // them.
 +
-+instruct vsubD(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (SubVD src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsub.vv $dst, $src1, $src2\t#@vsubD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  // -----------------
++  // The Grand Shuffle
 +
-+instruct vstring_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
-+                         iRegI_R10 result, vReg_V1 v1,
-+                         vReg_V2 v2, vReg_V3 v3, rFlagsReg cr)
-+%{
-+  predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
-+  match(Set result (StrEquals (Binary str1 str2) cnt));
-+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr);
++  // The Java calling convention is either equal (linux) or denser (win64) than the
++  // c calling convention. However the because of the jni_env argument the c calling
++  // convention always has at least one more (and two for static) arguments than Java.
++  // Therefore if we move the args from java -> c backwards then we will never have
++  // a register->register conflict and we don't have to build a dependency graph
++  // and figure out how to break any cycles.
++  //
 +
-+  format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %}
-+  ins_encode %{
-+    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
-+    __ string_equals_v($str1$$Register, $str2$$Register,
-+                       $result$$Register, $cnt$$Register, 1);
-+  %}
-+  ins_pipe(pipe_class_memory);
-+%}
++  // Record esp-based slot for receiver on stack for non-static methods
++  int receiver_offset = -1;
 +
-+instruct vstring_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
-+                         iRegI_R10 result, vReg_V1 v1,
-+                         vReg_V2 v2, vReg_V3 v3, rFlagsReg cr)
-+%{
-+  predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
-+  match(Set result (StrEquals (Binary str1 str2) cnt));
-+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr);
++  // This is a trick. We double the stack slots so we can claim
++  // the oops in the caller's frame. Since we are sure to have
++  // more args than the caller doubling is enough to make
++  // sure we can capture all the incoming oop args from the
++  // caller.
++  //
++  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
++  assert_cond(map != NULL);
 +
-+  format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %}
-+  ins_encode %{
-+    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
-+    __ string_equals_v($str1$$Register, $str2$$Register,
-+                       $result$$Register, $cnt$$Register, 2);
-+  %}
-+  ins_pipe(pipe_class_memory);
-+%}
++  int float_args = 0;
++  int int_args = 0;
 +
-+instruct varray_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
-+                        vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr)
-+%{
-+  predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
-+  match(Set result (AryEq ary1 ary2));
-+  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr);
++#ifdef ASSERT
++  bool reg_destroyed[RegisterImpl::number_of_registers];
++  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
++  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
++    reg_destroyed[r] = false;
++  }
++  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
++    freg_destroyed[f] = false;
++  }
 +
-+  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %}
-+  ins_encode %{
-+    __ arrays_equals_v($ary1$$Register, $ary2$$Register,
-+                       $result$$Register, $tmp$$Register, 1);
-+    %}
-+  ins_pipe(pipe_class_memory);
-+%}
++#endif /* ASSERT */
 +
-+instruct varray_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
-+                        vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr)
-+%{
-+  predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
-+  match(Set result (AryEq ary1 ary2));
-+  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr);
++  // For JNI natives the incoming and outgoing registers are offset upwards.
++  GrowableArray<int> arg_order(2 * total_in_args);
++  VMRegPair tmp_vmreg;
++  tmp_vmreg.set2(x9->as_VMReg());
 +
-+  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %}
-+  ins_encode %{
-+    __ arrays_equals_v($ary1$$Register, $ary2$$Register,
-+                       $result$$Register, $tmp$$Register, 2);
-+  %}
-+  ins_pipe(pipe_class_memory);
-+%}
++  for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
++    arg_order.push(i);
++    arg_order.push(c_arg);
++  }
 +
-+instruct vstring_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-+                          iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
-+                          iRegP_R28 tmp1, iRegL_R29 tmp2)
-+%{
-+  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
-+  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-+  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
-+         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
++  int temploc = -1;
++  for (int ai = 0; ai < arg_order.length(); ai += 2) {
++    int i = arg_order.at(ai);
++    int c_arg = arg_order.at(ai + 1);
++    __ block_comment(err_msg("mv %d -> %d", i, c_arg));
++    assert(c_arg != -1 && i != -1, "wrong order");
++#ifdef ASSERT
++    if (in_regs[i].first()->is_Register()) {
++      assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
++    } else if (in_regs[i].first()->is_FloatRegister()) {
++      assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
++    }
++    if (out_regs[c_arg].first()->is_Register()) {
++      reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
++    } else if (out_regs[c_arg].first()->is_FloatRegister()) {
++      freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
++    }
++#endif /* ASSERT */
++    switch (in_sig_bt[i]) {
++      case T_ARRAY:
++      case T_OBJECT:
++        object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
++                    ((i == 0) && (!is_static)),
++                    &receiver_offset);
++        int_args++;
++        break;
++      case T_VOID:
++        break;
 +
-+  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %}
-+  ins_encode %{
-+    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
-+    __ string_compare_v($str1$$Register, $str2$$Register,
-+                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
-+                        $tmp1$$Register, $tmp2$$Register,
-+                        StrIntrinsicNode::UU);
-+  %}
-+  ins_pipe(pipe_class_memory);
-+%}
-+instruct vstring_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-+                          iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
-+                          iRegP_R28 tmp1, iRegL_R29 tmp2)
-+%{
-+  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
-+  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-+  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
-+         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
++      case T_FLOAT:
++        float_move(masm, in_regs[i], out_regs[c_arg]);
++        float_args++;
++        break;
 +
-+  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %}
-+  ins_encode %{
-+    __ string_compare_v($str1$$Register, $str2$$Register,
-+                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
-+                        $tmp1$$Register, $tmp2$$Register,
-+                        StrIntrinsicNode::LL);
-+  %}
-+  ins_pipe(pipe_class_memory);
-+%}
++      case T_DOUBLE:
++        assert( i + 1 < total_in_args &&
++                in_sig_bt[i + 1] == T_VOID &&
++                out_sig_bt[c_arg + 1] == T_VOID, "bad arg list");
++        double_move(masm, in_regs[i], out_regs[c_arg]);
++        float_args++;
++        break;
 +
-+instruct vstring_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-+                           iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
-+                           iRegP_R28 tmp1, iRegL_R29 tmp2)
-+%{
-+  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
-+  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-+  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
-+         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
++      case T_LONG :
++        long_move(masm, in_regs[i], out_regs[c_arg]);
++        int_args++;
++        break;
 +
-+  format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %}
-+  ins_encode %{
-+    __ string_compare_v($str1$$Register, $str2$$Register,
-+                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
-+                        $tmp1$$Register, $tmp2$$Register,
-+                        StrIntrinsicNode::UL);
-+  %}
-+  ins_pipe(pipe_class_memory);
-+%}
-+instruct vstring_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-+                           iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
-+                           iRegP_R28 tmp1, iRegL_R29 tmp2)
-+%{
-+  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
-+  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-+  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
-+         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
++      case T_ADDRESS:
++        assert(false, "found T_ADDRESS in java args");
++        break;
 +
-+  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %}
-+  ins_encode %{
-+    __ string_compare_v($str1$$Register, $str2$$Register,
-+                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
-+                        $tmp1$$Register, $tmp2$$Register,
-+                        StrIntrinsicNode::LU);
-+  %}
-+  ins_pipe(pipe_class_memory);
-+%}
++      default:
++        move32_64(masm, in_regs[i], out_regs[c_arg]);
++        int_args++;
++    }
++  }
 +
-+// fast byte[] to char[] inflation
-+instruct vstring_inflate(Universe dummy, iRegP_R10 src, iRegP_R11 dst, iRegI_R12 len,
-+                         vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp)
-+%{
-+  predicate(UseRVV);
-+  match(Set dummy (StrInflatedCopy src (Binary dst len)));
-+  effect(TEMP v1, TEMP v2, TEMP v3, TEMP tmp, USE_KILL src, USE_KILL dst, USE_KILL len);
++  // point c_arg at the first arg that is already loaded in case we
++  // need to spill before we call out
++  int c_arg = total_c_args - total_in_args;
 +
-+  format %{ "String Inflate $src,$dst" %}
-+  ins_encode %{
-+    __ byte_array_inflate_v($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register);
-+  %}
-+  ins_pipe(pipe_class_memory);
-+%}
++  // Pre-load a static method's oop into c_rarg1.
++  if (method->is_static()) {
 +
-+// encode char[] to byte[] in ISO_8859_1
-+instruct vencode_iso_array(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result,
-+                           vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp)
-+%{
-+  predicate(UseRVV);
-+  match(Set result (EncodeISOArray src (Binary dst len)));
-+  effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len,
-+         TEMP v1, TEMP v2, TEMP v3, TEMP tmp);
++    //  load oop into a register
++    __ movoop(c_rarg1,
++              JNIHandles::make_local(method->method_holder()->java_mirror()),
++              /*immediate*/true);
 +
-+  format %{ "Encode array $src,$dst,$len -> $result" %}
-+  ins_encode %{
-+    __ encode_iso_array_v($src$$Register, $dst$$Register, $len$$Register,
-+                          $result$$Register, $tmp$$Register);
-+  %}
-+  ins_pipe( pipe_class_memory );
-+%}
++    // Now handlize the static class mirror it's known not-null.
++    __ sd(c_rarg1, Address(sp, klass_offset));
++    map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
 +
-+// fast char[] to byte[] compression
-+instruct vstring_compress(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result,
-+                          vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp)
-+%{
-+  predicate(UseRVV);
-+  match(Set result (StrCompressedCopy src (Binary dst len)));
-+  effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len,
-+         TEMP v1, TEMP v2, TEMP v3, TEMP tmp);
++    // Now get the handle
++    __ la(c_rarg1, Address(sp, klass_offset));
++    // and protect the arg if we must spill
++    c_arg--;
++  }
 +
-+  format %{ "String Compress $src,$dst -> $result    // KILL R11, R12, R13" %}
-+  ins_encode %{
-+    __ char_array_compress_v($src$$Register, $dst$$Register, $len$$Register,
-+                             $result$$Register, $tmp$$Register);
-+  %}
-+  ins_pipe( pipe_slow );
-+%}
++  // Change state to native (we save the return address in the thread, since it might not
++  // be pushed on the stack when we do a stack traversal).
++  // We use the same pc/oopMap repeatedly when we call out
 +
-+instruct vcount_positives(iRegP_R11 ary, iRegI_R12 len, iRegI_R10 result, iRegL tmp)
-+%{
-+  predicate(UseRVV);
-+  match(Set result (CountPositives ary len));
-+  effect(USE_KILL ary, USE_KILL len, TEMP tmp);
++  Label native_return;
++  __ set_last_Java_frame(sp, noreg, native_return, t0);
 +
-+  format %{ "count positives byte[] $ary, $len -> $result" %}
-+  ins_encode %{
-+    __ count_positives_v($ary$$Register, $len$$Register, $result$$Register, $tmp$$Register);
-+  %}
++  Label dtrace_method_entry, dtrace_method_entry_done;
++  {
++    int32_t offset = 0;
++    __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset);
++    __ lbu(t0, Address(t0, offset));
++    __ addw(t0, t0, zr);
++    __ bnez(t0, dtrace_method_entry);
++    __ bind(dtrace_method_entry_done);
++  }
 +
-+  ins_pipe(pipe_slow);
-+%}
++  // RedefineClasses() tracing support for obsolete method entry
++  if (log_is_enabled(Trace, redefine, class, obsolete)) {
++    // protect the args we've loaded
++    save_args(masm, total_c_args, c_arg, out_regs);
++    __ mov_metadata(c_rarg1, method());
++    __ call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
++      xthread, c_rarg1);
++    restore_args(masm, total_c_args, c_arg, out_regs);
++  }
 +
-+instruct vstringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
-+                               iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
-+                               vReg_V1 v1, vReg_V2 v2, vReg_V3 v3)
-+%{
-+  predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
-+  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
-+  effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
-+         TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3);
++  // Lock a synchronized method
 +
-+  format %{ "StringUTF16 IndexOf char[] $str1, $cnt1, $ch -> $result" %}
++  // Register definitions used by locking and unlocking
 +
-+  ins_encode %{
-+    __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register,
-+                             $result$$Register, $tmp1$$Register, $tmp2$$Register,
-+                             false /* isL */);
-+  %}
++  const Register swap_reg = x10;
++  const Register obj_reg  = x9;  // Will contain the oop
++  const Register lock_reg = x30;  // Address of compiler lock object (BasicLock)
++  const Register old_hdr  = x30;  // value of old header at unlock time
++  const Register tmp      = ra;
 +
-+  ins_pipe(pipe_class_memory);
-+%}
++  Label slow_path_lock;
++  Label lock_done;
 +
-+instruct vstringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
-+                               iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
-+                               vReg_V1 v1, vReg_V2 v2, vReg_V3 v3)
-+%{
-+  predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
-+  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
-+  effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
-+         TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3);
++  if (method->is_synchronized()) {
 +
-+  format %{ "StringLatin1 IndexOf char[] $str1, $cnt1, $ch -> $result" %}
++    const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
 +
-+  ins_encode %{
-+    __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register,
-+                             $result$$Register, $tmp1$$Register, $tmp2$$Register,
-+                             true /* isL */);
-+  %}
++    // Get the handle (the 2nd argument)
++    __ mv(oop_handle_reg, c_rarg1);
 +
-+  ins_pipe(pipe_class_memory);
-+%}
++    // Get address of the box
 +
-+// clearing of an array
-+instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy,
-+                             vReg_V1 vReg1, vReg_V2 vReg2, vReg_V3 vReg3)
-+%{
-+  predicate(UseRVV);
-+  match(Set dummy (ClearArray cnt base));
-+  effect(USE_KILL cnt, USE_KILL base, TEMP vReg1, TEMP vReg2, TEMP vReg3);
++    __ la(lock_reg, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
 +
-+  format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %}
++    // Load the oop from the handle
++    __ ld(obj_reg, Address(oop_handle_reg, 0));
 +
-+  ins_encode %{
-+    __ clear_array_v($base$$Register, $cnt$$Register);
-+  %}
++    if (UseBiasedLocking) {
++      __ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, lock_done, &slow_path_lock);
++    }
 +
-+  ins_pipe(pipe_class_memory);
-+%}
-diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-new file mode 100644
-index 00000000000..f85d4b25a76
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-@@ -0,0 +1,2761 @@
-+/*
-+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++    // Load (object->mark() | 1) into swap_reg % x10
++    __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
++    __ ori(swap_reg, t0, 1);
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "code/debugInfoRec.hpp"
-+#include "code/icBuffer.hpp"
-+#include "code/vtableStubs.hpp"
-+#include "compiler/oopMap.hpp"
-+#include "gc/shared/barrierSetAssembler.hpp"
-+#include "interpreter/interp_masm.hpp"
-+#include "interpreter/interpreter.hpp"
-+#include "logging/log.hpp"
-+#include "memory/resourceArea.hpp"
-+#include "nativeInst_riscv.hpp"
-+#include "oops/compiledICHolder.hpp"
-+#include "oops/klass.inline.hpp"
-+#include "prims/methodHandles.hpp"
-+#include "runtime/jniHandles.hpp"
-+#include "runtime/safepointMechanism.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "runtime/signature.hpp"
-+#include "runtime/stubRoutines.hpp"
-+#include "runtime/vframeArray.hpp"
-+#include "utilities/align.hpp"
-+#include "utilities/formatBuffer.hpp"
-+#include "vmreg_riscv.inline.hpp"
-+#ifdef COMPILER1
-+#include "c1/c1_Runtime1.hpp"
-+#endif
-+#ifdef COMPILER2
-+#include "adfiles/ad_riscv.hpp"
-+#include "opto/runtime.hpp"
-+#endif
++    // Save (object->mark() | 1) into BasicLock's displaced header
++    __ sd(swap_reg, Address(lock_reg, mark_word_offset));
 +
-+#define __ masm->
++    // src -> dest if dest == x10 else x10 <- dest
++    {
++      Label here;
++      __ cmpxchg_obj_header(x10, lock_reg, obj_reg, t0, lock_done, /*fallthrough*/NULL);
++    }
 +
-+const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
-+
-+class SimpleRuntimeFrame {
-+public:
-+
-+  // Most of the runtime stubs have this simple frame layout.
-+  // This class exists to make the layout shared in one place.
-+  // Offsets are for compiler stack slots, which are jints.
-+  enum layout {
-+    // The frame sender code expects that fp will be in the "natural" place and
-+    // will override any oopMap setting for it. We must therefore force the layout
-+    // so that it agrees with the frame sender code.
-+    // we don't expect any arg reg save area so riscv asserts that
-+    // frame::arg_reg_save_area_bytes == 0
-+    fp_off = 0, fp_off2,
-+    return_off, return_off2,
-+    framesize
-+  };
-+};
++    // Test if the oopMark is an obvious stack pointer, i.e.,
++    //  1) (mark & 3) == 0, and
++    //  2) sp <= mark < mark + os::pagesize()
++    // These 3 tests can be done by evaluating the following
++    // expression: ((mark - sp) & (3 - os::vm_page_size())),
++    // assuming both stack pointer and pagesize have their
++    // least significant 2 bits clear.
++    // NOTE: the oopMark is in swap_reg % 10 as the result of cmpxchg
 +
-+class RegisterSaver {
-+  const bool _save_vectors;
-+ public:
-+  RegisterSaver(bool save_vectors) : _save_vectors(UseRVV && save_vectors) {}
-+  ~RegisterSaver() {}
-+  OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
-+  void restore_live_registers(MacroAssembler* masm);
++    __ sub(swap_reg, swap_reg, sp);
++    __ andi(swap_reg, swap_reg, 3 - os::vm_page_size());
 +
-+  // Offsets into the register save area
-+  // Used by deoptimization when it is managing result register
-+  // values on its own
-+  // gregs:28, float_register:32; except: x1(ra) & x2(sp) & gp(x3) & tp(x4)
-+  // |---v0---|<---SP
-+  // |---v1---|save vectors only in generate_handler_blob
-+  // |-- .. --|
-+  // |---v31--|-----
-+  // |---f0---|
-+  // |---f1---|
-+  // |   ..   |
-+  // |---f31--|
-+  // |---reserved slot for stack alignment---|
-+  // |---x5---|
-+  // |   x6   |
-+  // |---.. --|
-+  // |---x31--|
-+  // |---fp---|
-+  // |---ra---|
-+  int v0_offset_in_bytes(void) { return 0; }
-+  int f0_offset_in_bytes(void) {
-+    int f0_offset = 0;
-+#ifdef COMPILER2
-+    if (_save_vectors) {
-+      f0_offset += Matcher::scalable_vector_reg_size(T_INT) * VectorRegisterImpl::number_of_registers *
-+                   BytesPerInt;
-+    }
-+#endif
-+    return f0_offset;
-+  }
-+  int reserved_slot_offset_in_bytes(void) {
-+    return f0_offset_in_bytes() +
-+           FloatRegisterImpl::max_slots_per_register *
-+           FloatRegisterImpl::number_of_registers *
-+           BytesPerInt;
-+  }
++    // Save the test result, for recursive case, the result is zero
++    __ sd(swap_reg, Address(lock_reg, mark_word_offset));
++    __ bnez(swap_reg, slow_path_lock);
 +
-+  int reg_offset_in_bytes(Register r) {
-+    assert (r->encoding() > 4, "ra, sp, gp and tp not saved");
-+    return reserved_slot_offset_in_bytes() + (r->encoding() - 4 /* x1, x2, x3, x4 */) * wordSize;
++    // Slow path will re-enter here
++    __ bind(lock_done);
 +  }
 +
-+  int freg_offset_in_bytes(FloatRegister f) {
-+    return f0_offset_in_bytes() + f->encoding() * wordSize;
-+  }
 +
-+  int ra_offset_in_bytes(void) {
-+    return reserved_slot_offset_in_bytes() +
-+           (RegisterImpl::number_of_registers - 3) *
-+           RegisterImpl::max_slots_per_register *
-+           BytesPerInt;
-+  }
-+};
++  // Finally just about ready to make the JNI call
 +
-+OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
-+  int vector_size_in_bytes = 0;
-+  int vector_size_in_slots = 0;
-+#ifdef COMPILER2
-+  if (_save_vectors) {
-+    vector_size_in_bytes += Matcher::scalable_vector_reg_size(T_BYTE);
-+    vector_size_in_slots += Matcher::scalable_vector_reg_size(T_INT);
-+  }
-+#endif
++  // get JNIEnv* which is first argument to native
++  __ la(c_rarg0, Address(xthread, in_bytes(JavaThread::jni_environment_offset())));
 +
-+  assert_cond(masm != NULL && total_frame_words != NULL);
-+  int frame_size_in_bytes = align_up(additional_frame_words * wordSize + ra_offset_in_bytes() + wordSize, 16);
-+  // OopMap frame size is in compiler stack slots (jint's) not bytes or words
-+  int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
-+  // The caller will allocate additional_frame_words
-+  int additional_frame_slots = additional_frame_words * wordSize / BytesPerInt;
-+  // CodeBlob frame size is in words.
-+  int frame_size_in_words = frame_size_in_bytes / wordSize;
-+  *total_frame_words = frame_size_in_words;
++  // Now set thread in native
++  __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
++  __ mv(t0, _thread_in_native);
++  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
++  __ sw(t0, Address(t1));
 +
-+  // Save Integer, Float and Vector registers.
-+  __ enter();
-+  __ push_CPU_state(_save_vectors, vector_size_in_bytes);
++  rt_call(masm, native_func);
 +
-+  // Set an oopmap for the call site.  This oopmap will map all
-+  // oop-registers and debug-info registers as callee-saved.  This
-+  // will allow deoptimization at this safepoint to find all possible
-+  // debug-info recordings, as well as let GC find all oops.
++  __ bind(native_return);
 +
-+  OopMapSet *oop_maps = new OopMapSet();
-+  OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
-+  assert_cond(oop_maps != NULL && oop_map != NULL);
++  intptr_t return_pc = (intptr_t) __ pc();
++  oop_maps->add_gc_map(return_pc - start, map);
 +
-+  int sp_offset_in_slots = 0;
-+  int step_in_slots = 0;
-+  if (_save_vectors) {
-+    step_in_slots = vector_size_in_slots;
-+    for (int i = 0; i < VectorRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
-+      VectorRegister r = as_VectorRegister(i);
-+      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg());
-+    }
++  // Unpack native results.
++  if (ret_type != T_OBJECT && ret_type != T_ARRAY) {
++    __ cast_primitive_type(ret_type, x10);
 +  }
 +
-+  step_in_slots = FloatRegisterImpl::max_slots_per_register;
-+  for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
-+    FloatRegister r = as_FloatRegister(i);
-+    oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg());
-+  }
++  Label safepoint_in_progress, safepoint_in_progress_done;
++  Label after_transition;
 +
-+  step_in_slots = RegisterImpl::max_slots_per_register;
-+  // skip the slot reserved for alignment, see MacroAssembler::push_reg;
-+  // also skip x5 ~ x6 on the stack because they are caller-saved registers.
-+  sp_offset_in_slots += RegisterImpl::max_slots_per_register * 3;
-+  // besides, we ignore x0 ~ x4 because push_CPU_state won't push them on the stack.
-+  for (int i = 7; i < RegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
-+    Register r = as_Register(i);
-+    if (r != xthread) {
-+      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots + additional_frame_slots), r->as_VMReg());
-+    }
++  // Switch thread to "native transition" state before reading the synchronization state.
++  // This additional state is necessary because reading and testing the synchronization
++  // state is not atomic w.r.t. GC, as this scenario demonstrates:
++  //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
++  //     VM thread changes sync state to synchronizing and suspends threads for GC.
++  //     Thread A is resumed to finish this native method, but doesn't block here since it
++  //     didn't see any synchronization is progress, and escapes.
++  __ mv(t0, _thread_in_native_trans);
++
++  __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
++
++  // Force this write out before the read below
++  __ membar(MacroAssembler::AnyAny);
++
++  // check for safepoint operation in progress and/or pending suspend requests
++  {
++    __ safepoint_poll_acquire(safepoint_in_progress);
++    __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset()));
++    __ bnez(t0, safepoint_in_progress);
++    __ bind(safepoint_in_progress_done);
 +  }
 +
-+  return oop_map;
-+}
++  // change thread state
++  __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
++  __ mv(t0, _thread_in_Java);
++  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
++  __ sw(t0, Address(t1));
++  __ bind(after_transition);
 +
-+void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
-+  assert_cond(masm != NULL);
-+#ifdef COMPILER2
-+  __ pop_CPU_state(_save_vectors, Matcher::scalable_vector_reg_size(T_BYTE));
-+#else
-+  __ pop_CPU_state(_save_vectors);
-+#endif
-+  __ leave();
-+}
++  Label reguard;
++  Label reguard_done;
++  __ lbu(t0, Address(xthread, JavaThread::stack_guard_state_offset()));
++  __ mv(t1, JavaThread::stack_guard_yellow_reserved_disabled);
++  __ beq(t0, t1, reguard);
++  __ bind(reguard_done);
 +
-+// Is vector's size (in bytes) bigger than a size saved by default?
-+// riscv does not ovlerlay the floating-point registers on vector registers like aarch64.
-+bool SharedRuntime::is_wide_vector(int size) {
-+  return UseRVV;
-+}
++  // native result if any is live
 +
-+// The java_calling_convention describes stack locations as ideal slots on
-+// a frame with no abi restrictions. Since we must observe abi restrictions
-+// (like the placement of the register window) the slots must be biased by
-+// the following value.
-+static int reg2offset_in(VMReg r) {
-+  // Account for saved fp and ra
-+  // This should really be in_preserve_stack_slots
-+  return r->reg2stack() * VMRegImpl::stack_slot_size;
-+}
++  // Unlock
++  Label unlock_done;
++  Label slow_path_unlock;
++  if (method->is_synchronized()) {
 +
-+static int reg2offset_out(VMReg r) {
-+  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
-+}
++    // Get locked oop from the handle we passed to jni
++    __ ld(obj_reg, Address(oop_handle_reg, 0));
 +
-+// ---------------------------------------------------------------------------
-+// Read the array of BasicTypes from a signature, and compute where the
-+// arguments should go.  Values in the VMRegPair regs array refer to 4-byte
-+// quantities.  Values less than VMRegImpl::stack0 are registers, those above
-+// refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
-+// as framesizes are fixed.
-+// VMRegImpl::stack0 refers to the first slot 0(sp).
-+// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
-+// up to RegisterImpl::number_of_registers) are the 64-bit
-+// integer registers.
++    Label done;
 +
-+// Note: the INPUTS in sig_bt are in units of Java argument words,
-+// which are 64-bit.  The OUTPUTS are in 32-bit units.
++    if (UseBiasedLocking) {
++      __ biased_locking_exit(obj_reg, old_hdr, done);
++    }
 +
-+// The Java calling convention is a "shifted" version of the C ABI.
-+// By skipping the first C ABI register we can call non-static jni
-+// methods with small numbers of arguments without having to shuffle
-+// the arguments at all. Since we control the java ABI we ought to at
-+// least get some advantage out of it.
++    // Simple recursive lock?
++    __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
++    __ beqz(t0, done);
 +
-+int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
-+                                           VMRegPair *regs,
-+                                           int total_args_passed) {
-+  // Create the mapping between argument positions and
-+  // registers.
-+  static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = {
-+    j_rarg0, j_rarg1, j_rarg2, j_rarg3,
-+    j_rarg4, j_rarg5, j_rarg6, j_rarg7
-+  };
-+  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = {
-+    j_farg0, j_farg1, j_farg2, j_farg3,
-+    j_farg4, j_farg5, j_farg6, j_farg7
-+  };
++    // Must save x10 if if it is live now because cmpxchg must use it
++    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
++      save_native_result(masm, ret_type, stack_slots);
++    }
 +
-+  uint int_args = 0;
-+  uint fp_args = 0;
-+  uint stk_args = 0; // inc by 2 each time
++    // get address of the stack lock
++    __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
++    //  get old displaced header
++    __ ld(old_hdr, Address(x10, 0));
 +
-+  for (int i = 0; i < total_args_passed; i++) {
-+    switch (sig_bt[i]) {
-+      case T_BOOLEAN: // fall through
-+      case T_CHAR:    // fall through
-+      case T_BYTE:    // fall through
-+      case T_SHORT:   // fall through
-+      case T_INT:
-+        if (int_args < Argument::n_int_register_parameters_j) {
-+          regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
-+        } else {
-+          regs[i].set1(VMRegImpl::stack2reg(stk_args));
-+          stk_args += 2;
-+        }
-+        break;
-+      case T_VOID:
-+        // halves of T_LONG or T_DOUBLE
-+        assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
-+        regs[i].set_bad();
-+        break;
-+      case T_LONG:      // fall through
-+        assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
-+      case T_OBJECT:    // fall through
-+      case T_ARRAY:     // fall through
-+      case T_ADDRESS:
-+        if (int_args < Argument::n_int_register_parameters_j) {
-+          regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
-+        } else {
-+          regs[i].set2(VMRegImpl::stack2reg(stk_args));
-+          stk_args += 2;
-+        }
-+        break;
-+      case T_FLOAT:
-+        if (fp_args < Argument::n_float_register_parameters_j) {
-+          regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
-+        } else {
-+          regs[i].set1(VMRegImpl::stack2reg(stk_args));
-+          stk_args += 2;
-+        }
-+        break;
-+      case T_DOUBLE:
-+        assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
-+        if (fp_args < Argument::n_float_register_parameters_j) {
-+          regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
-+        } else {
-+          regs[i].set2(VMRegImpl::stack2reg(stk_args));
-+          stk_args += 2;
-+        }
-+        break;
-+      default:
-+        ShouldNotReachHere();
++    // Atomic swap old header if oop still contains the stack lock
++    Label succeed;
++    __ cmpxchg_obj_header(x10, old_hdr, obj_reg, t0, succeed, &slow_path_unlock);
++    __ bind(succeed);
++
++    // slow path re-enters here
++    __ bind(unlock_done);
++    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
++      restore_native_result(masm, ret_type, stack_slots);
 +    }
++
++    __ bind(done);
 +  }
 +
-+  return align_up(stk_args, 2);
-+}
++  Label dtrace_method_exit, dtrace_method_exit_done;
++  {
++    int32_t offset = 0;
++    __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset);
++    __ lbu(t0, Address(t0, offset));
++    __ bnez(t0, dtrace_method_exit);
++    __ bind(dtrace_method_exit_done);
++  }
 +
-+// Patch the callers callsite with entry to compiled code if it exists.
-+static void patch_callers_callsite(MacroAssembler *masm) {
-+  assert_cond(masm != NULL);
-+  Label L;
-+  __ ld(t0, Address(xmethod, in_bytes(Method::code_offset())));
-+  __ beqz(t0, L);
++  __ reset_last_Java_frame(false);
 +
-+  __ enter();
-+  __ push_CPU_state();
++  // Unbox oop result, e.g. JNIHandles::resolve result.
++  if (is_reference_type(ret_type)) {
++    __ resolve_jobject(x10, xthread, t1);
++  }
 +
-+  // VM needs caller's callsite
-+  // VM needs target method
-+  // This needs to be a long call since we will relocate this adapter to
-+  // the codeBuffer and it may not reach
++  if (CheckJNICalls) {
++    // clear_pending_jni_exception_check
++    __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset()));
++  }
 +
-+#ifndef PRODUCT
-+  assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
-+#endif
++  // reset handle block
++  __ ld(x12, Address(xthread, JavaThread::active_handles_offset()));
++  __ sd(zr, Address(x12, JNIHandleBlock::top_offset_in_bytes()));
 +
-+  __ mv(c_rarg0, xmethod);
-+  __ mv(c_rarg1, ra);
-+  int32_t offset = 0;
-+  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)), offset);
-+  __ jalr(x1, t0, offset);
++  __ leave();
 +
-+  // Explicit fence.i required because fixup_callers_callsite may change the code
-+  // stream.
-+  __ safepoint_ifence();
++  // Any exception pending?
++  __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
++  __ bnez(t0, exception_pending);
 +
-+  __ pop_CPU_state();
-+  // restore sp
-+  __ leave();
-+  __ bind(L);
-+}
++  // We're done
++  __ ret();
 +
-+static void gen_c2i_adapter(MacroAssembler *masm,
-+                            int total_args_passed,
-+                            int comp_args_on_stack,
-+                            const BasicType *sig_bt,
-+                            const VMRegPair *regs,
-+                            Label& skip_fixup) {
-+  // Before we get into the guts of the C2I adapter, see if we should be here
-+  // at all.  We've come from compiled code and are attempting to jump to the
-+  // interpreter, which means the caller made a static call to get here
-+  // (vcalls always get a compiled target if there is one).  Check for a
-+  // compiled target.  If there is one, we need to patch the caller's call.
-+  patch_callers_callsite(masm);
++  // Unexpected paths are out of line and go here
 +
-+  __ bind(skip_fixup);
++  // forward the exception
++  __ bind(exception_pending);
 +
-+  int words_pushed = 0;
++  // and forward the exception
++  __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
 +
-+  // Since all args are passed on the stack, total_args_passed *
-+  // Interpreter::stackElementSize is the space we need.
++  // Slow path locking & unlocking
++  if (method->is_synchronized()) {
 +
-+  int extraspace = total_args_passed * Interpreter::stackElementSize;
++    __ block_comment("Slow path lock {");
++    __ bind(slow_path_lock);
 +
-+  __ mv(x30, sp);
++    // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
++    // args are (oop obj, BasicLock* lock, JavaThread* thread)
 +
-+  // stack is aligned, keep it that way
-+  extraspace = align_up(extraspace, 2 * wordSize);
++    // protect the args we've loaded
++    save_args(masm, total_c_args, c_arg, out_regs);
 +
-+  if (extraspace) {
-+    __ sub(sp, sp, extraspace);
-+  }
++    __ mv(c_rarg0, obj_reg);
++    __ mv(c_rarg1, lock_reg);
++    __ mv(c_rarg2, xthread);
 +
-+  // Now write the args into the outgoing interpreter space
-+  for (int i = 0; i < total_args_passed; i++) {
-+    if (sig_bt[i] == T_VOID) {
-+      assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half");
-+      continue;
++    // Not a leaf but we have last_Java_frame setup as we want
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3);
++    restore_args(masm, total_c_args, c_arg, out_regs);
++
++#ifdef ASSERT
++    { Label L;
++      __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
++      __ beqz(t0, L);
++      __ stop("no pending exception allowed on exit from monitorenter");
++      __ bind(L);
 +    }
++#endif
++    __ j(lock_done);
 +
-+    // offset to start parameters
-+    int st_off   = (total_args_passed - i - 1) * Interpreter::stackElementSize;
-+    int next_off = st_off - Interpreter::stackElementSize;
++    __ block_comment("} Slow path lock");
 +
-+    // Say 4 args:
-+    // i   st_off
-+    // 0   32 T_LONG
-+    // 1   24 T_VOID
-+    // 2   16 T_OBJECT
-+    // 3    8 T_BOOL
-+    // -    0 return address
-+    //
-+    // However to make thing extra confusing. Because we can fit a Java long/double in
-+    // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
-+    // leaves one slot empty and only stores to a single slot. In this case the
-+    // slot that is occupied is the T_VOID slot. See I said it was confusing.
++    __ block_comment("Slow path unlock {");
++    __ bind(slow_path_unlock);
 +
-+    VMReg r_1 = regs[i].first();
-+    VMReg r_2 = regs[i].second();
-+    if (!r_1->is_valid()) {
-+      assert(!r_2->is_valid(), "");
-+      continue;
++    if (ret_type == T_FLOAT || ret_type == T_DOUBLE) {
++      save_native_result(masm, ret_type, stack_slots);
 +    }
-+    if (r_1->is_stack()) {
-+      // memory to memory use t0
-+      int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size
-+                    + extraspace
-+                    + words_pushed * wordSize);
-+      if (!r_2->is_valid()) {
-+        __ lwu(t0, Address(sp, ld_off));
-+        __ sd(t0, Address(sp, st_off), /*temp register*/esp);
-+      } else {
-+        __ ld(t0, Address(sp, ld_off), /*temp register*/esp);
 +
-+        // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
-+        // T_DOUBLE and T_LONG use two slots in the interpreter
-+        if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
-+          // ld_off == LSW, ld_off+wordSize == MSW
-+          // st_off == MSW, next_off == LSW
-+          __ sd(t0, Address(sp, next_off), /*temp register*/esp);
-+#ifdef ASSERT
-+          // Overwrite the unused slot with known junk
-+          __ li(t0, 0xdeadffffdeadaaaaul);
-+          __ sd(t0, Address(sp, st_off), /*temp register*/esp);
-+#endif /* ASSERT */
-+        } else {
-+          __ sd(t0, Address(sp, st_off), /*temp register*/esp);
-+        }
-+      }
-+    } else if (r_1->is_Register()) {
-+      Register r = r_1->as_Register();
-+      if (!r_2->is_valid()) {
-+        // must be only an int (or less ) so move only 32bits to slot
-+        __ sd(r, Address(sp, st_off));
-+      } else {
-+        // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
-+        // T_DOUBLE and T_LONG use two slots in the interpreter
-+        if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
-+          // long/double in gpr
-+#ifdef ASSERT
-+          // Overwrite the unused slot with known junk
-+          __ li(t0, 0xdeadffffdeadaaabul);
-+          __ sd(t0, Address(sp, st_off), /*temp register*/esp);
-+#endif /* ASSERT */
-+          __ sd(r, Address(sp, next_off));
-+        } else {
-+          __ sd(r, Address(sp, st_off));
-+        }
-+      }
-+    } else {
-+      assert(r_1->is_FloatRegister(), "");
-+      if (!r_2->is_valid()) {
-+        // only a float use just part of the slot
-+        __ fsw(r_1->as_FloatRegister(), Address(sp, st_off));
-+      } else {
++    __ mv(c_rarg2, xthread);
++    __ la(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
++    __ mv(c_rarg0, obj_reg);
++
++    // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
++    // NOTE that obj_reg == x9 currently
++    __ ld(x9, Address(xthread, in_bytes(Thread::pending_exception_offset())));
++    __ sd(zr, Address(xthread, in_bytes(Thread::pending_exception_offset())));
++
++    rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
++
 +#ifdef ASSERT
-+        // Overwrite the unused slot with known junk
-+        __ li(t0, 0xdeadffffdeadaaacul);
-+        __ sd(t0, Address(sp, st_off), /*temp register*/esp);
-+#endif /* ASSERT */
-+        __ fsd(r_1->as_FloatRegister(), Address(sp, next_off));
-+      }
++    {
++      Label L;
++      __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
++      __ beqz(t0, L);
++      __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
++      __ bind(L);
 +    }
-+  }
++#endif /* ASSERT */
 +
-+  __ mv(esp, sp); // Interp expects args on caller's expression stack
++    __ sd(x9, Address(xthread, in_bytes(Thread::pending_exception_offset())));
 +
-+  __ ld(t0, Address(xmethod, in_bytes(Method::interpreter_entry_offset())));
-+  __ jr(t0);
-+}
++    if (ret_type == T_FLOAT || ret_type == T_DOUBLE) {
++      restore_native_result(masm, ret_type, stack_slots);
++    }
++    __ j(unlock_done);
 +
-+void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
-+                                    int total_args_passed,
-+                                    int comp_args_on_stack,
-+                                    const BasicType *sig_bt,
-+                                    const VMRegPair *regs) {
-+  // Cut-out for having no stack args.
-+  int comp_words_on_stack = align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, wordSize) >> LogBytesPerWord;
-+  if (comp_args_on_stack != 0) {
-+    __ sub(t0, sp, comp_words_on_stack * wordSize);
-+    __ andi(sp, t0, -16);
-+  }
++    __ block_comment("} Slow path unlock");
 +
-+  // Will jump to the compiled code just as if compiled code was doing it.
-+  // Pre-load the register-jump target early, to schedule it better.
-+  __ ld(t1, Address(xmethod, in_bytes(Method::from_compiled_offset())));
++  } // synchronized
 +
-+  // Now generate the shuffle code.
-+  for (int i = 0; i < total_args_passed; i++) {
-+    if (sig_bt[i] == T_VOID) {
-+      assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half");
-+      continue;
-+    }
++  // SLOW PATH Reguard the stack if needed
 +
-+    // Pick up 0, 1 or 2 words from SP+offset.
++  __ bind(reguard);
++  save_native_result(masm, ret_type, stack_slots);
++  rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
++  restore_native_result(masm, ret_type, stack_slots);
++  // and continue
++  __ j(reguard_done);
 +
-+    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
-+           "scrambled load targets?");
-+    // Load in argument order going down.
-+    int ld_off = (total_args_passed - i - 1) * Interpreter::stackElementSize;
-+    // Point to interpreter value (vs. tag)
-+    int next_off = ld_off - Interpreter::stackElementSize;
++  // SLOW PATH safepoint
++  {
++    __ block_comment("safepoint {");
++    __ bind(safepoint_in_progress);
 +
-+    VMReg r_1 = regs[i].first();
-+    VMReg r_2 = regs[i].second();
-+    if (!r_1->is_valid()) {
-+      assert(!r_2->is_valid(), "");
-+      continue;
-+    }
-+    if (r_1->is_stack()) {
-+      // Convert stack slot to an SP offset (+ wordSize to account for return address )
-+      int st_off = regs[i].first()->reg2stack() * VMRegImpl::stack_slot_size;
-+      if (!r_2->is_valid()) {
-+        __ lw(t0, Address(esp, ld_off));
-+        __ sd(t0, Address(sp, st_off), /*temp register*/t2);
-+      } else {
-+        //
-+        // We are using two optoregs. This can be either T_OBJECT,
-+        // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
-+        // two slots but only uses one for thr T_LONG or T_DOUBLE case
-+        // So we must adjust where to pick up the data to match the
-+        // interpreter.
-+        //
-+        // Interpreter local[n] == MSW, local[n+1] == LSW however locals
-+        // are accessed as negative so LSW is at LOW address
++    // Don't use call_VM as it will see a possible pending exception and forward it
++    // and never return here preventing us from clearing _last_native_pc down below.
++    //
++    save_native_result(masm, ret_type, stack_slots);
++    __ mv(c_rarg0, xthread);
++#ifndef PRODUCT
++    assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
++#endif
++    int32_t offset = 0;
++    __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)), offset);
++    __ jalr(x1, t0, offset);
 +
-+        // ld_off is MSW so get LSW
-+        const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ?
-+                           next_off : ld_off;
-+        __ ld(t0, Address(esp, offset));
-+        // st_off is LSW (i.e. reg.first())
-+        __ sd(t0, Address(sp, st_off), /*temp register*/t2);
-+      }
-+    } else if (r_1->is_Register()) {  // Register argument
-+      Register r = r_1->as_Register();
-+      if (r_2->is_valid()) {
-+        //
-+        // We are using two VMRegs. This can be either T_OBJECT,
-+        // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
-+        // two slots but only uses one for thr T_LONG or T_DOUBLE case
-+        // So we must adjust where to pick up the data to match the
-+        // interpreter.
++    // Restore any method result value
++    restore_native_result(masm, ret_type, stack_slots);
 +
-+        const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ?
-+                           next_off : ld_off;
++    __ j(safepoint_in_progress_done);
++    __ block_comment("} safepoint");
++  }
 +
-+        // this can be a misaligned move
-+        __ ld(r, Address(esp, offset));
-+      } else {
-+        // sign extend and use a full word?
-+        __ lw(r, Address(esp, ld_off));
-+      }
-+    } else {
-+      if (!r_2->is_valid()) {
-+        __ flw(r_1->as_FloatRegister(), Address(esp, ld_off));
-+      } else {
-+        __ fld(r_1->as_FloatRegister(), Address(esp, next_off));
-+      }
-+    }
++  // SLOW PATH dtrace support
++  {
++    __ block_comment("dtrace entry {");
++    __ bind(dtrace_method_entry);
++
++    // We have all of the arguments setup at this point. We must not touch any register
++    // argument registers at this point (what if we save/restore them there are no oop?
++
++    save_args(masm, total_c_args, c_arg, out_regs);
++    __ mov_metadata(c_rarg1, method());
++    __ call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
++      xthread, c_rarg1);
++    restore_args(masm, total_c_args, c_arg, out_regs);
++    __ j(dtrace_method_entry_done);
++    __ block_comment("} dtrace entry");
 +  }
 +
-+  // 6243940 We might end up in handle_wrong_method if
-+  // the callee is deoptimized as we race thru here. If that
-+  // happens we don't want to take a safepoint because the
-+  // caller frame will look interpreted and arguments are now
-+  // "compiled" so it is much better to make this transition
-+  // invisible to the stack walking code. Unfortunately if
-+  // we try and find the callee by normal means a safepoint
-+  // is possible. So we stash the desired callee in the thread
-+  // and the vm will find there should this case occur.
++  {
++    __ block_comment("dtrace exit {");
++    __ bind(dtrace_method_exit);
++    save_native_result(masm, ret_type, stack_slots);
++    __ mov_metadata(c_rarg1, method());
++    __ call_VM_leaf(
++         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
++         xthread, c_rarg1);
++    restore_native_result(masm, ret_type, stack_slots);
++    __ j(dtrace_method_exit_done);
++    __ block_comment("} dtrace exit");
++  }
 +
-+  __ sd(xmethod, Address(xthread, JavaThread::callee_target_offset()));
++  __ flush();
 +
-+  __ jr(t1);
++  nmethod *nm = nmethod::new_native_nmethod(method,
++                                            compile_id,
++                                            masm->code(),
++                                            vep_offset,
++                                            frame_complete,
++                                            stack_slots / VMRegImpl::slots_per_word,
++                                            (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
++                                            in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
++                                            oop_maps);
++  assert(nm != NULL, "create native nmethod fail!");
++  return nm;
 +}
 +
-+// ---------------------------------------------------------------
-+AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
-+                                                            int total_args_passed,
-+                                                            int comp_args_on_stack,
-+                                                            const BasicType *sig_bt,
-+                                                            const VMRegPair *regs,
-+                                                            AdapterFingerPrint* fingerprint) {
-+  address i2c_entry = __ pc();
-+  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
++// this function returns the adjust size (in number of words) to a c2i adapter
++// activation for use during deoptimization
++int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
++  assert(callee_locals >= callee_parameters,
++         "test and remove; got more parms than locals");
++  if (callee_locals < callee_parameters) {
++    return 0;                   // No adjustment for negative locals
++  }
++  int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
++  // diff is counted in stack words
++  return align_up(diff, 2);
++}
 +
-+  address c2i_unverified_entry = __ pc();
-+  Label skip_fixup;
++//------------------------------generate_deopt_blob----------------------------
++void SharedRuntime::generate_deopt_blob() {
++  // Allocate space for the code
++  ResourceMark rm;
++  // Setup code generation tools
++  int pad = 0;
++  CodeBuffer buffer("deopt_blob", 2048 + pad, 1024);
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++  int frame_size_in_words = -1;
++  OopMap* map = NULL;
++  OopMapSet *oop_maps = new OopMapSet();
++  assert_cond(masm != NULL && oop_maps != NULL);
++  RegisterSaver reg_saver;
 +
-+  Label ok;
++  // -------------
++  // This code enters when returning to a de-optimized nmethod.  A return
++  // address has been pushed on the the stack, and return values are in
++  // registers.
++  // If we are doing a normal deopt then we were called from the patched
++  // nmethod from the point we returned to the nmethod. So the return
++  // address on the stack is wrong by NativeCall::instruction_size
++  // We will adjust the value so it looks like we have the original return
++  // address on the stack (like when we eagerly deoptimized).
++  // In the case of an exception pending when deoptimizing, we enter
++  // with a return address on the stack that points after the call we patched
++  // into the exception handler. We have the following register state from,
++  // e.g., the forward exception stub (see stubGenerator_riscv.cpp).
++  //    x10: exception oop
++  //    x9: exception handler
++  //    x13: throwing pc
++  // So in this case we simply jam x13 into the useless return address and
++  // the stack looks just like we want.
++  //
++  // At this point we need to de-opt.  We save the argument return
++  // registers.  We call the first C routine, fetch_unroll_info().  This
++  // routine captures the return values and returns a structure which
++  // describes the current frame size and the sizes of all replacement frames.
++  // The current frame is compiled code and may contain many inlined
++  // functions, each with their own JVM state.  We pop the current frame, then
++  // push all the new frames.  Then we call the C routine unpack_frames() to
++  // populate these frames.  Finally unpack_frames() returns us the new target
++  // address.  Notice that callee-save registers are BLOWN here; they have
++  // already been captured in the vframeArray at the time the return PC was
++  // patched.
++  address start = __ pc();
++  Label cont;
 +
-+  const Register holder = t1;
-+  const Register receiver = j_rarg0;
-+  const Register tmp = t2;  // A call-clobbered register not used for arg passing
++  // Prolog for non exception case!
 +
-+  // -------------------------------------------------------------------------
-+  // Generate a C2I adapter.  On entry we know xmethod holds the Method* during calls
-+  // to the interpreter.  The args start out packed in the compiled layout.  They
-+  // need to be unpacked into the interpreter layout.  This will almost always
-+  // require some stack space.  We grow the current (compiled) stack, then repack
-+  // the args.  We  finally end in a jump to the generic interpreter entry point.
-+  // On exit from the interpreter, the interpreter will restore our SP (lest the
-+  // compiled code, which relys solely on SP and not FP, get sick).
++  // Save everything in sight.
++  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
 +
-+  {
-+    __ block_comment("c2i_unverified_entry {");
-+    __ load_klass(t0, receiver);
-+    __ ld(tmp, Address(holder, CompiledICHolder::holder_klass_offset()));
-+    __ ld(xmethod, Address(holder, CompiledICHolder::holder_metadata_offset()));
-+    __ beq(t0, tmp, ok);
-+    __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
++  // Normal deoptimization.  Save exec mode for unpack_frames.
++  __ mvw(xcpool, Deoptimization::Unpack_deopt); // callee-saved
++  __ j(cont);
 +
-+    __ bind(ok);
-+    // Method might have been compiled since the call site was patched to
-+    // interpreted; if that is the case treat it as a miss so we can get
-+    // the call site corrected.
-+    __ ld(t0, Address(xmethod, in_bytes(Method::code_offset())));
-+    __ beqz(t0, skip_fixup);
-+    __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
-+    __ block_comment("} c2i_unverified_entry");
-+  }
++  int reexecute_offset = __ pc() - start;
 +
-+  address c2i_entry = __ pc();
++  // Reexecute case
++  // return address is the pc describes what bci to do re-execute at
 +
-+  // Class initialization barrier for static methods
-+  address c2i_no_clinit_check_entry = NULL;
-+  if (VM_Version::supports_fast_class_init_checks()) {
-+    Label L_skip_barrier;
++  // No need to update map as each call to save_live_registers will produce identical oopmap
++  (void) reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
 +
-+    { // Bypass the barrier for non-static methods
-+      __ lwu(t0, Address(xmethod, Method::access_flags_offset()));
-+      __ andi(t1, t0, JVM_ACC_STATIC);
-+      __ beqz(t1, L_skip_barrier); // non-static
-+    }
++  __ mvw(xcpool, Deoptimization::Unpack_reexecute); // callee-saved
++  __ j(cont);
 +
-+    __ load_method_holder(t1, xmethod);
-+    __ clinit_barrier(t1, t0, &L_skip_barrier);
-+    __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
++  int exception_offset = __ pc() - start;
 +
-+    __ bind(L_skip_barrier);
-+    c2i_no_clinit_check_entry = __ pc();
-+  }
++  // Prolog for exception case
 +
-+  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+  bs->c2i_entry_barrier(masm);
++  // all registers are dead at this entry point, except for x10, and
++  // x13 which contain the exception oop and exception pc
++  // respectively.  Set them in TLS and fall thru to the
++  // unpack_with_exception_in_tls entry point.
 +
-+  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
++  __ sd(x13, Address(xthread, JavaThread::exception_pc_offset()));
++  __ sd(x10, Address(xthread, JavaThread::exception_oop_offset()));
 +
-+  __ flush();
-+  return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry);
-+}
++  int exception_in_tls_offset = __ pc() - start;
 +
-+int SharedRuntime::vector_calling_convention(VMRegPair *regs,
-+                                             uint num_bits,
-+                                             uint total_args_passed) {
-+  Unimplemented();
-+  return 0;
-+}
++  // new implementation because exception oop is now passed in JavaThread
 +
-+int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
-+                                         VMRegPair *regs,
-+                                         VMRegPair *regs2,
-+                                         int total_args_passed) {
-+  assert(regs2 == NULL, "not needed on riscv");
++  // Prolog for exception case
++  // All registers must be preserved because they might be used by LinearScan
++  // Exceptiop oop and throwing PC are passed in JavaThread
++  // tos: stack at point of call to method that threw the exception (i.e. only
++  // args are on the stack, no return address)
 +
-+  // We return the amount of VMRegImpl stack slots we need to reserve for all
-+  // the arguments NOT counting out_preserve_stack_slots.
++  // The return address pushed by save_live_registers will be patched
++  // later with the throwing pc. The correct value is not available
++  // now because loading it from memory would destroy registers.
 +
-+  static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
-+    c_rarg0, c_rarg1, c_rarg2, c_rarg3,
-+    c_rarg4, c_rarg5,  c_rarg6,  c_rarg7
-+  };
-+  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
-+    c_farg0, c_farg1, c_farg2, c_farg3,
-+    c_farg4, c_farg5, c_farg6, c_farg7
-+  };
++  // NB: The SP at this point must be the SP of the method that is
++  // being deoptimized.  Deoptimization assumes that the frame created
++  // here by save_live_registers is immediately below the method's SP.
++  // This is a somewhat fragile mechanism.
 +
-+  uint int_args = 0;
-+  uint fp_args = 0;
-+  uint stk_args = 0; // inc by 2 each time
++  // Save everything in sight.
++  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
 +
-+  for (int i = 0; i < total_args_passed; i++) {
-+    switch (sig_bt[i]) {
-+      case T_BOOLEAN:  // fall through
-+      case T_CHAR:     // fall through
-+      case T_BYTE:     // fall through
-+      case T_SHORT:    // fall through
-+      case T_INT:
-+        if (int_args < Argument::n_int_register_parameters_c) {
-+          regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
-+        } else {
-+          regs[i].set1(VMRegImpl::stack2reg(stk_args));
-+          stk_args += 2;
-+        }
-+        break;
-+      case T_LONG:      // fall through
-+        assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
-+      case T_OBJECT:    // fall through
-+      case T_ARRAY:     // fall through
-+      case T_ADDRESS:   // fall through
-+      case T_METADATA:
-+        if (int_args < Argument::n_int_register_parameters_c) {
-+          regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
-+        } else {
-+          regs[i].set2(VMRegImpl::stack2reg(stk_args));
-+          stk_args += 2;
-+        }
-+        break;
-+      case T_FLOAT:
-+        if (fp_args < Argument::n_float_register_parameters_c) {
-+          regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
-+        } else if (int_args < Argument::n_int_register_parameters_c) {
-+          regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
-+        } else {
-+          regs[i].set1(VMRegImpl::stack2reg(stk_args));
-+          stk_args += 2;
-+        }
-+        break;
-+      case T_DOUBLE:
-+        assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
-+        if (fp_args < Argument::n_float_register_parameters_c) {
-+          regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
-+        } else if (int_args < Argument::n_int_register_parameters_c) {
-+          regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
-+        } else {
-+          regs[i].set2(VMRegImpl::stack2reg(stk_args));
-+          stk_args += 2;
-+        }
-+        break;
-+      case T_VOID: // Halves of longs and doubles
-+        assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
-+        regs[i].set_bad();
-+        break;
-+      default:
-+        ShouldNotReachHere();
-+    }
-+  }
++  // Now it is safe to overwrite any register
 +
-+  return stk_args;
-+}
++  // Deopt during an exception.  Save exec mode for unpack_frames.
++  __ mv(xcpool, Deoptimization::Unpack_exception); // callee-saved
 +
-+// On 64 bit we will store integer like items to the stack as
-+// 64 bits items (riscv64 abi) even though java would only store
-+// 32bits for a parameter. On 32bit it will simply be 32 bits
-+// So this routine will do 32->32 on 32bit and 32->64 on 64bit
-+static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
-+  assert_cond(masm != NULL);
-+  if (src.first()->is_stack()) {
-+    if (dst.first()->is_stack()) {
-+      // stack to stack
-+      __ ld(t0, Address(fp, reg2offset_in(src.first())));
-+      __ sd(t0, Address(sp, reg2offset_out(dst.first())));
-+    } else {
-+      // stack to reg
-+      __ lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
-+    }
-+  } else if (dst.first()->is_stack()) {
-+    // reg to stack
-+    __ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
-+  } else {
-+    if (dst.first() != src.first()) {
-+      // 32bits extend sign
-+      __ addw(dst.first()->as_Register(), src.first()->as_Register(), zr);
-+    }
-+  }
-+}
++  // load throwing pc from JavaThread and patch it as the return address
++  // of the current frame. Then clear the field in JavaThread
 +
-+// An oop arg. Must pass a handle not the oop itself
-+static void object_move(MacroAssembler* masm,
-+                        OopMap* map,
-+                        int oop_handle_offset,
-+                        int framesize_in_slots,
-+                        VMRegPair src,
-+                        VMRegPair dst,
-+                        bool is_receiver,
-+                        int* receiver_offset) {
-+  assert_cond(masm != NULL && map != NULL && receiver_offset != NULL);
-+  // must pass a handle. First figure out the location we use as a handle
-+  Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register();
++  __ ld(x13, Address(xthread, JavaThread::exception_pc_offset()));
++  __ sd(x13, Address(fp, frame::return_addr_offset * wordSize));
++  __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
 +
-+  // See if oop is NULL if it is we need no handle
++#ifdef ASSERT
++  // verify that there is really an exception oop in JavaThread
++  __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
++  __ verify_oop(x10);
 +
-+  if (src.first()->is_stack()) {
++  // verify that there is no pending exception
++  Label no_pending_exception;
++  __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
++  __ beqz(t0, no_pending_exception);
++  __ stop("must not have pending exception here");
++  __ bind(no_pending_exception);
++#endif
 +
-+    // Oop is already on the stack as an argument
-+    int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
-+    map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
-+    if (is_receiver) {
-+      *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
-+    }
++  __ bind(cont);
 +
-+    __ ld(t0, Address(fp, reg2offset_in(src.first())));
-+    __ la(rHandle, Address(fp, reg2offset_in(src.first())));
-+    // conditionally move a NULL
-+    Label notZero1;
-+    __ bnez(t0, notZero1);
-+    __ mv(rHandle, zr);
-+    __ bind(notZero1);
-+  } else {
++  // Call C code.  Need thread and this frame, but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.
++  //
++  // UnrollBlock* fetch_unroll_info(JavaThread* thread)
 +
-+    // Oop is in an a register we must store it to the space we reserve
-+    // on the stack for oop_handles and pass a handle if oop is non-NULL
++  // fetch_unroll_info needs to call last_java_frame().
 +
-+    const Register rOop = src.first()->as_Register();
-+    int oop_slot = -1;
-+    if (rOop == j_rarg0) {
-+      oop_slot = 0;
-+    } else if (rOop == j_rarg1) {
-+      oop_slot = 1;
-+    } else if (rOop == j_rarg2) {
-+      oop_slot = 2;
-+    } else if (rOop == j_rarg3) {
-+      oop_slot = 3;
-+    } else if (rOop == j_rarg4) {
-+      oop_slot = 4;
-+    } else if (rOop == j_rarg5) {
-+      oop_slot = 5;
-+    } else if (rOop == j_rarg6) {
-+      oop_slot = 6;
-+    } else {
-+      assert(rOop == j_rarg7, "wrong register");
-+      oop_slot = 7;
-+    }
++  Label retaddr;
++  __ set_last_Java_frame(sp, noreg, retaddr, t0);
++#ifdef ASSERT
++  {
++    Label L;
++    __ ld(t0, Address(xthread,
++                              JavaThread::last_Java_fp_offset()));
++    __ beqz(t0, L);
++    __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
++    __ bind(L);
++  }
++#endif // ASSERT
++  __ mv(c_rarg0, xthread);
++  __ mv(c_rarg1, xcpool);
++  int32_t offset = 0;
++  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)), offset);
++  __ jalr(x1, t0, offset);
++  __ bind(retaddr);
 +
-+    oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset;
-+    int offset = oop_slot * VMRegImpl::stack_slot_size;
++  // Need to have an oopmap that tells fetch_unroll_info where to
++  // find any register it might need.
++  oop_maps->add_gc_map(__ pc() - start, map);
 +
-+    map->set_oop(VMRegImpl::stack2reg(oop_slot));
-+    // Store oop in handle area, may be NULL
-+    __ sd(rOop, Address(sp, offset));
-+    if (is_receiver) {
-+      *receiver_offset = offset;
-+    }
++  __ reset_last_Java_frame(false);
 +
-+    //rOop maybe the same as rHandle
-+    if (rOop == rHandle) {
-+      Label isZero;
-+      __ beqz(rOop, isZero);
-+      __ la(rHandle, Address(sp, offset));
-+      __ bind(isZero);
-+    } else {
-+      Label notZero2;
-+      __ la(rHandle, Address(sp, offset));
-+      __ bnez(rOop, notZero2);
-+      __ mv(rHandle, zr);
-+      __ bind(notZero2);
-+    }
-+  }
++  // Load UnrollBlock* into x15
++  __ mv(x15, x10);
 +
-+  // If arg is on the stack then place it otherwise it is already in correct reg.
-+  if (dst.first()->is_stack()) {
-+    __ sd(rHandle, Address(sp, reg2offset_out(dst.first())));
-+  }
-+}
++  __ lwu(xcpool, Address(x15, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
++  Label noException;
++  __ mv(t0, Deoptimization::Unpack_exception);
++  __ bne(xcpool, t0, noException); // Was exception pending?
++  __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
++  __ ld(x13, Address(xthread, JavaThread::exception_pc_offset()));
++  __ sd(zr, Address(xthread, JavaThread::exception_oop_offset()));
++  __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
 +
-+// A float arg may have to do float reg int reg conversion
-+static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
-+  assert(src.first()->is_stack() && dst.first()->is_stack() ||
-+         src.first()->is_reg() && dst.first()->is_reg() || src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
-+  assert_cond(masm != NULL);
-+  if (src.first()->is_stack()) {
-+    if (dst.first()->is_stack()) {
-+      __ lwu(t0, Address(fp, reg2offset_in(src.first())));
-+      __ sw(t0, Address(sp, reg2offset_out(dst.first())));
-+    } else if (dst.first()->is_Register()) {
-+      __ lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
-+    } else {
-+      ShouldNotReachHere();
-+    }
-+  } else if (src.first() != dst.first()) {
-+    if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
-+      __ fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
-+    } else {
-+      ShouldNotReachHere();
-+    }
-+  }
-+}
++  __ verify_oop(x10);
 +
-+// A long move
-+static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
-+  assert_cond(masm != NULL);
-+  if (src.first()->is_stack()) {
-+    if (dst.first()->is_stack()) {
-+      // stack to stack
-+      __ ld(t0, Address(fp, reg2offset_in(src.first())));
-+      __ sd(t0, Address(sp, reg2offset_out(dst.first())));
-+    } else {
-+      // stack to reg
-+      __ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
-+    }
-+  } else if (dst.first()->is_stack()) {
-+    // reg to stack
-+    __ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
-+  } else {
-+    if (dst.first() != src.first()) {
-+      __ mv(dst.first()->as_Register(), src.first()->as_Register());
-+    }
-+  }
-+}
++  // Overwrite the result registers with the exception results.
++  __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
 +
-+// A double move
-+static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
-+  assert(src.first()->is_stack() && dst.first()->is_stack() ||
-+         src.first()->is_reg() && dst.first()->is_reg() || src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
-+  assert_cond(masm != NULL);
-+  if (src.first()->is_stack()) {
-+    if (dst.first()->is_stack()) {
-+      __ ld(t0, Address(fp, reg2offset_in(src.first())));
-+      __ sd(t0, Address(sp, reg2offset_out(dst.first())));
-+    } else if (dst.first()-> is_Register()) {
-+      __ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
-+    } else {
-+      ShouldNotReachHere();
-+    }
-+  } else if (src.first() != dst.first()) {
-+    if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
-+      __ fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
-+    } else {
-+      ShouldNotReachHere();
-+    }
-+  }
-+}
++  __ bind(noException);
 +
-+void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
-+  assert_cond(masm != NULL);
-+  // We always ignore the frame_slots arg and just use the space just below frame pointer
-+  // which by this time is free to use
-+  switch (ret_type) {
-+    case T_FLOAT:
-+      __ fsw(f10, Address(fp, -3 * wordSize));
-+      break;
-+    case T_DOUBLE:
-+      __ fsd(f10, Address(fp, -3 * wordSize));
-+      break;
-+    case T_VOID:  break;
-+    default: {
-+      __ sd(x10, Address(fp, -3 * wordSize));
-+    }
-+  }
-+}
++  // Only register save data is on the stack.
++  // Now restore the result registers.  Everything else is either dead
++  // or captured in the vframeArray.
 +
-+void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
-+  assert_cond(masm != NULL);
-+  // We always ignore the frame_slots arg and just use the space just below frame pointer
-+  // which by this time is free to use
-+  switch (ret_type) {
-+    case T_FLOAT:
-+      __ flw(f10, Address(fp, -3 * wordSize));
-+      break;
-+    case T_DOUBLE:
-+      __ fld(f10, Address(fp, -3 * wordSize));
-+      break;
-+    case T_VOID:  break;
-+    default: {
-+      __ ld(x10, Address(fp, -3 * wordSize));
-+    }
-+  }
-+}
++  // Restore fp result register
++  __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
++  // Restore integer result register
++  __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
 +
-+static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
-+  assert_cond(masm != NULL && args != NULL);
-+  RegSet x;
-+  for ( int i = first_arg ; i < arg_count ; i++ ) {
-+    if (args[i].first()->is_Register()) {
-+      x = x + args[i].first()->as_Register();
-+    } else if (args[i].first()->is_FloatRegister()) {
-+      __ addi(sp, sp, -2 * wordSize);
-+      __ fsd(args[i].first()->as_FloatRegister(), Address(sp, 0));
-+    }
-+  }
-+  __ push_reg(x, sp);
-+}
++  // Pop all of the register save area off the stack
++  __ add(sp, sp, frame_size_in_words * wordSize);
 +
-+static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
-+  assert_cond(masm != NULL && args != NULL);
-+  RegSet x;
-+  for ( int i = first_arg ; i < arg_count ; i++ ) {
-+    if (args[i].first()->is_Register()) {
-+      x = x + args[i].first()->as_Register();
-+    } else {
-+      ;
-+    }
-+  }
-+  __ pop_reg(x, sp);
-+  for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
-+    if (args[i].first()->is_Register()) {
-+      ;
-+    } else if (args[i].first()->is_FloatRegister()) {
-+      __ fld(args[i].first()->as_FloatRegister(), Address(sp, 0));
-+      __ add(sp, sp, 2 * wordSize);
-+    }
-+  }
-+}
++  // All of the register save area has been popped of the stack. Only the
++  // return address remains.
 +
-+static void rt_call(MacroAssembler* masm, address dest) {
-+  assert_cond(masm != NULL);
-+  CodeBlob *cb = CodeCache::find_blob(dest);
-+  if (cb) {
-+    __ far_call(RuntimeAddress(dest));
-+  } else {
-+    int32_t offset = 0;
-+    __ la_patchable(t0, RuntimeAddress(dest), offset);
-+    __ jalr(x1, t0, offset);
-+  }
-+}
++  // Pop all the frames we must move/replace.
++  //
++  // Frame picture (youngest to oldest)
++  // 1: self-frame (no frame link)
++  // 2: deopting frame  (no frame link)
++  // 3: caller of deopting frame (could be compiled/interpreted).
++  //
++  // Note: by leaving the return address of self-frame on the stack
++  // and using the size of frame 2 to adjust the stack
++  // when we are done the return to frame 3 will still be on the stack.
 +
-+static void verify_oop_args(MacroAssembler* masm,
-+                            const methodHandle& method,
-+                            const BasicType* sig_bt,
-+                            const VMRegPair* regs) {
-+  const Register temp_reg = x9;  // not part of any compiled calling seq
-+  if (VerifyOops) {
-+    for (int i = 0; i < method->size_of_parameters(); i++) {
-+      if (sig_bt[i] == T_OBJECT ||
-+          sig_bt[i] == T_ARRAY) {
-+        VMReg r = regs[i].first();
-+        assert(r->is_valid(), "bad oop arg");
-+        if (r->is_stack()) {
-+          __ ld(temp_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
-+          __ verify_oop(temp_reg);
-+        } else {
-+          __ verify_oop(r->as_Register());
-+        }
-+      }
-+    }
-+  }
-+}
++  // Pop deoptimized frame
++  __ lwu(x12, Address(x15, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
++  __ sub(x12, x12, 2 * wordSize);
++  __ add(sp, sp, x12);
++  __ ld(fp, Address(sp, 0));
++  __ ld(ra, Address(sp, wordSize));
++  __ addi(sp, sp, 2 * wordSize);
++  // RA should now be the return address to the caller (3)
 +
-+static void gen_special_dispatch(MacroAssembler* masm,
-+                                 const methodHandle& method,
-+                                 const BasicType* sig_bt,
-+                                 const VMRegPair* regs) {
-+  verify_oop_args(masm, method, sig_bt, regs);
-+  vmIntrinsics::ID iid = method->intrinsic_id();
++#ifdef ASSERT
++  // Compilers generate code that bang the stack by as much as the
++  // interpreter would need. So this stack banging should never
++  // trigger a fault. Verify that it does not on non product builds.
++  __ lwu(x9, Address(x15, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
++  __ bang_stack_size(x9, x12);
++#endif
++  // Load address of array of frame pcs into x12
++  __ ld(x12, Address(x15, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
 +
-+  // Now write the args into the outgoing interpreter space
-+  bool     has_receiver   = false;
-+  Register receiver_reg   = noreg;
-+  int      member_arg_pos = -1;
-+  Register member_reg     = noreg;
-+  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
-+  if (ref_kind != 0) {
-+    member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
-+    member_reg = x9;  // known to be free at this point
-+    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
-+  } else if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) {
-+    has_receiver = true;
-+  } else {
-+    fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid));
-+  }
++  // Load address of array of frame sizes into x14
++  __ ld(x14, Address(x15, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
 +
-+  if (member_reg != noreg) {
-+    // Load the member_arg into register, if necessary.
-+    SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
-+    VMReg r = regs[member_arg_pos].first();
-+    if (r->is_stack()) {
-+      __ ld(member_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
-+    } else {
-+      // no data motion is needed
-+      member_reg = r->as_Register();
-+    }
-+  }
++  // Load counter into x13
++  __ lwu(x13, Address(x15, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
 +
-+  if (has_receiver) {
-+    // Make sure the receiver is loaded into a register.
-+    assert(method->size_of_parameters() > 0, "oob");
-+    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
-+    VMReg r = regs[0].first();
-+    assert(r->is_valid(), "bad receiver arg");
-+    if (r->is_stack()) {
-+      // Porting note:  This assumes that compiled calling conventions always
-+      // pass the receiver oop in a register.  If this is not true on some
-+      // platform, pick a temp and load the receiver from stack.
-+      fatal("receiver always in a register");
-+      receiver_reg = x12;  // known to be free at this point
-+      __ ld(receiver_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
-+    } else {
-+      // no data motion is needed
-+      receiver_reg = r->as_Register();
-+    }
-+  }
++  // Now adjust the caller's stack to make up for the extra locals
++  // but record the original sp so that we can save it in the skeletal interpreter
++  // frame and the stack walking of interpreter_sender will get the unextended sp
++  // value and not the "real" sp value.
 +
-+  // Figure out which address we are really jumping to:
-+  MethodHandles::generate_method_handle_dispatch(masm, iid,
-+                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
-+}
++  const Register sender_sp = x16;
 +
-+// ---------------------------------------------------------------------------
-+// Generate a native wrapper for a given method.  The method takes arguments
-+// in the Java compiled code convention, marshals them to the native
-+// convention (handlizes oops, etc), transitions to native, makes the call,
-+// returns to java state (possibly blocking), unhandlizes any result and
-+// returns.
-+//
-+// Critical native functions are a shorthand for the use of
-+// GetPrimtiveArrayCritical and disallow the use of any other JNI
-+// functions.  The wrapper is expected to unpack the arguments before
-+// passing them to the callee and perform checks before and after the
-+// native call to ensure that they GCLocker
-+// lock_critical/unlock_critical semantics are followed.  Some other
-+// parts of JNI setup are skipped like the tear down of the JNI handle
-+// block and the check for pending exceptions it's impossible for them
-+// to be thrown.
-+//
-+// They are roughly structured like this:
-+//    if (GCLocker::needs_gc()) SharedRuntime::block_for_jni_critical()
-+//    tranistion to thread_in_native
-+//    unpack arrray arguments and call native entry point
-+//    check for safepoint in progress
-+//    check if any thread suspend flags are set
-+//      call into JVM and possible unlock the JNI critical
-+//      if a GC was suppressed while in the critical native.
-+//    transition back to thread_in_Java
-+//    return to caller
-+//
-+nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
-+                                                const methodHandle& method,
-+                                                int compile_id,
-+                                                BasicType* in_sig_bt,
-+                                                VMRegPair* in_regs,
-+                                                BasicType ret_type) {
-+  if (method->is_method_handle_intrinsic()) {
-+    vmIntrinsics::ID iid = method->intrinsic_id();
-+    intptr_t start = (intptr_t)__ pc();
-+    int vep_offset = ((intptr_t)__ pc()) - start;
++  __ mv(sender_sp, sp);
++  __ lwu(x9, Address(x15,
++                     Deoptimization::UnrollBlock::
++                     caller_adjustment_offset_in_bytes()));
++  __ sub(sp, sp, x9);
 +
-+    // First instruction must be a nop as it may need to be patched on deoptimisation
-+    __ nop();
-+    gen_special_dispatch(masm,
-+                         method,
-+                         in_sig_bt,
-+                         in_regs);
-+    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
-+    __ flush();
-+    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
-+    return nmethod::new_native_nmethod(method,
-+                                       compile_id,
-+                                       masm->code(),
-+                                       vep_offset,
-+                                       frame_complete,
-+                                       stack_slots / VMRegImpl::slots_per_word,
-+                                       in_ByteSize(-1),
-+                                       in_ByteSize(-1),
-+                                       (OopMapSet*)NULL);
-+  }
-+  address native_func = method->native_function();
-+  assert(native_func != NULL, "must have function");
++  // Push interpreter frames in a loop
++  __ mv(t0, 0xDEADDEAD);               // Make a recognizable pattern
++  __ mv(t1, t0);
++  Label loop;
++  __ bind(loop);
++  __ ld(x9, Address(x14, 0));          // Load frame size
++  __ addi(x14, x14, wordSize);
++  __ sub(x9, x9, 2 * wordSize);        // We'll push pc and fp by hand
++  __ ld(ra, Address(x12, 0));          // Load pc
++  __ addi(x12, x12, wordSize);
++  __ enter();                          // Save old & set new fp
++  __ sub(sp, sp, x9);                  // Prolog
++  // This value is corrected by layout_activation_impl
++  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++  __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable
++  __ mv(sender_sp, sp);                // Pass sender_sp to next frame
++  __ addi(x13, x13, -1);               // Decrement counter
++  __ bnez(x13, loop);
 +
-+  // An OopMap for lock (and class if static)
-+  OopMapSet *oop_maps = new OopMapSet();
-+  assert_cond(oop_maps != NULL);
-+  intptr_t start = (intptr_t)__ pc();
++    // Re-push self-frame
++  __ ld(ra, Address(x12));
++  __ enter();
 +
-+  // We have received a description of where all the java arg are located
-+  // on entry to the wrapper. We need to convert these args to where
-+  // the jni function will expect them. To figure out where they go
-+  // we convert the java signature to a C signature by inserting
-+  // the hidden arguments as arg[0] and possibly arg[1] (static method)
++  // Allocate a full sized register save area.  We subtract 2 because
++  // enter() just pushed 2 words
++  __ sub(sp, sp, (frame_size_in_words - 2) * wordSize);
 +
-+  const int total_in_args = method->size_of_parameters();
-+  int total_c_args = total_in_args + (method->is_static() ? 2 : 1);
++  // Restore frame locals after moving the frame
++  __ fsd(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
++  __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
 +
-+  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
-+  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
-+  BasicType* in_elem_bt = NULL;
++  // Call C code.  Need thread but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.  Call should
++  // restore return values to their stack-slots with the new SP.
++  //
++  // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode)
 +
-+  int argc = 0;
-+  out_sig_bt[argc++] = T_ADDRESS;
-+  if (method->is_static()) {
-+    out_sig_bt[argc++] = T_OBJECT;
-+  }
++  // Use fp because the frames look interpreted now
++  // Don't need the precise return PC here, just precise enough to point into this code blob.
++  address the_pc = __ pc();
++  __ set_last_Java_frame(sp, fp, the_pc, t0);
 +
-+  for (int i = 0; i < total_in_args ; i++) {
-+    out_sig_bt[argc++] = in_sig_bt[i];
-+  }
++  __ mv(c_rarg0, xthread);
++  __ mv(c_rarg1, xcpool); // second arg: exec_mode
++  offset = 0;
++  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset);
++  __ jalr(x1, t0, offset);
 +
-+  // Now figure out where the args must be stored and how much stack space
-+  // they require.
-+  int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
++  // Set an oopmap for the call site
++  // Use the same PC we used for the last java frame
++  oop_maps->add_gc_map(the_pc - start,
++                       new OopMap(frame_size_in_words, 0));
 +
-+  // Compute framesize for the wrapper.  We need to handlize all oops in
-+  // incoming registers
++  // Clear fp AND pc
++  __ reset_last_Java_frame(true);
 +
-+  // Calculate the total number of stack slots we will need.
++  // Collect return values
++  __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
++  __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
 +
-+  // First count the abi requirement plus all of the outgoing args
-+  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
++  // Pop self-frame.
++  __ leave();                           // Epilog
 +
-+  // Now the space for the inbound oop handle area
-+  int total_save_slots = 8 * VMRegImpl::slots_per_word;  // 8 arguments passed in registers
++  // Jump to interpreter
++  __ ret();
 +
-+  int oop_handle_offset = stack_slots;
-+  stack_slots += total_save_slots;
++  // Make sure all code is generated
++  masm->flush();
 +
-+  // Now any space we need for handlizing a klass if static method
++  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
++  assert(_deopt_blob != NULL, "create deoptimization blob fail!");
++  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
++}
 +
-+  int klass_slot_offset = 0;
-+  int klass_offset = -1;
-+  int lock_slot_offset = 0;
-+  bool is_static = false;
++uint SharedRuntime::out_preserve_stack_slots() {
++  return 0;
++}
 +
-+  if (method->is_static()) {
-+    klass_slot_offset = stack_slots;
-+    stack_slots += VMRegImpl::slots_per_word;
-+    klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
-+    is_static = true;
-+  }
++#ifdef COMPILER2
++//------------------------------generate_uncommon_trap_blob--------------------
++void SharedRuntime::generate_uncommon_trap_blob() {
++  // Allocate space for the code
++  ResourceMark rm;
++  // Setup code generation tools
++  CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++  assert_cond(masm != NULL);
 +
-+  // Plus a lock if needed
++  assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
 +
-+  if (method->is_synchronized()) {
-+    lock_slot_offset = stack_slots;
-+    stack_slots += VMRegImpl::slots_per_word;
-+  }
++  address start = __ pc();
 +
-+  // Now a place (+2) to save return values or temp during shuffling
-+  // + 4 for return address (which we own) and saved fp
-+  stack_slots += 6;
++  // Push self-frame.  We get here with a return address in RA
++  // and sp should be 16 byte aligned
++  // push fp and retaddr by hand
++  __ addi(sp, sp, -2 * wordSize);
++  __ sd(ra, Address(sp, wordSize));
++  __ sd(fp, Address(sp, 0));
++  // we don't expect an arg reg save area
++#ifndef PRODUCT
++  assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
++#endif
++  // compiler left unloaded_class_index in j_rarg0 move to where the
++  // runtime expects it.
++  __ addiw(c_rarg1, j_rarg0, 0);
 +
-+  // Ok The space we have allocated will look like:
-+  //
-+  //
-+  // FP-> |                     |
-+  //      | 2 slots (ra)        |
-+  //      | 2 slots (fp)        |
-+  //      |---------------------|
-+  //      | 2 slots for moves   |
-+  //      |---------------------|
-+  //      | lock box (if sync)  |
-+  //      |---------------------| <- lock_slot_offset
-+  //      | klass (if static)   |
-+  //      |---------------------| <- klass_slot_offset
-+  //      | oopHandle area      |
-+  //      |---------------------| <- oop_handle_offset (8 java arg registers)
-+  //      | outbound memory     |
-+  //      | based arguments     |
-+  //      |                     |
-+  //      |---------------------|
-+  //      |                     |
-+  // SP-> | out_preserved_slots |
++  // we need to set the past SP to the stack pointer of the stub frame
++  // and the pc to the address where this runtime call will return
++  // although actually any pc in this code blob will do).
++  Label retaddr;
++  __ set_last_Java_frame(sp, noreg, retaddr, t0);
++
++  // Call C code.  Need thread but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.  Call should
++  // capture callee-saved registers as well as return values.
 +  //
++  // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index, jint exec_mode)
 +  //
++  // n.b. 3 gp args, 0 fp args, integral return type
 +
++  __ mv(c_rarg0, xthread);
++  __ mvw(c_rarg2, (unsigned)Deoptimization::Unpack_uncommon_trap);
++  int32_t offset = 0;
++  __ la_patchable(t0,
++        RuntimeAddress(CAST_FROM_FN_PTR(address,
++                                        Deoptimization::uncommon_trap)), offset);
++  __ jalr(x1, t0, offset);
++  __ bind(retaddr);
 +
-+  // Now compute actual number of stack words we need rounding to make
-+  // stack properly aligned.
-+  stack_slots = align_up(stack_slots, StackAlignmentInSlots);
++  // Set an oopmap for the call site
++  OopMapSet* oop_maps = new OopMapSet();
++  OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0);
++  assert_cond(oop_maps != NULL && map != NULL);
 +
-+  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
++  // location of fp is known implicitly by the frame sender code
 +
-+  // First thing make an ic check to see if we should even be here
++  oop_maps->add_gc_map(__ pc() - start, map);
 +
-+  // We are free to use all registers as temps without saving them and
-+  // restoring them except fp. fp is the only callee save register
-+  // as far as the interpreter and the compiler(s) are concerned.
++  __ reset_last_Java_frame(false);
 +
++  // move UnrollBlock* into x14
++  __ mv(x14, x10);
 +
-+  const Register ic_reg = t1;
-+  const Register receiver = j_rarg0;
++#ifdef ASSERT
++  { Label L;
++    __ lwu(t0, Address(x14, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
++    __ mvw(t1, Deoptimization::Unpack_uncommon_trap);
++    __ beq(t0, t1, L);
++    __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
++    __ bind(L);
++  }
++#endif
 +
-+  Label hit;
-+  Label exception_pending;
++  // Pop all the frames we must move/replace.
++  //
++  // Frame picture (youngest to oldest)
++  // 1: self-frame (no frame link)
++  // 2: deopting frame  (no frame link)
++  // 3: caller of deopting frame (could be compiled/interpreted).
 +
-+  assert_different_registers(ic_reg, receiver, t0);
-+  __ verify_oop(receiver);
-+  __ cmp_klass(receiver, ic_reg, t0, hit);
++  __ add(sp, sp, (SimpleRuntimeFrame::framesize) << LogBytesPerInt); // Epilog!
 +
-+  __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
++  // Pop deoptimized frame (int)
++  __ lwu(x12, Address(x14,
++                      Deoptimization::UnrollBlock::
++                      size_of_deoptimized_frame_offset_in_bytes()));
++  __ sub(x12, x12, 2 * wordSize);
++  __ add(sp, sp, x12);
++  __ ld(fp, sp, 0);
++  __ ld(ra, sp, wordSize);
++  __ addi(sp, sp, 2 * wordSize);
++  // RA should now be the return address to the caller (3) frame
 +
-+  // Verified entry point must be aligned
-+  __ align(8);
++#ifdef ASSERT
++  // Compilers generate code that bang the stack by as much as the
++  // interpreter would need. So this stack banging should never
++  // trigger a fault. Verify that it does not on non product builds.
++  __ lwu(x11, Address(x14,
++                      Deoptimization::UnrollBlock::
++                      total_frame_sizes_offset_in_bytes()));
++  __ bang_stack_size(x11, x12);
++#endif
 +
-+  __ bind(hit);
++  // Load address of array of frame pcs into x12 (address*)
++  __ ld(x12, Address(x14,
++                     Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
 +
-+  int vep_offset = ((intptr_t)__ pc()) - start;
++  // Load address of array of frame sizes into x15 (intptr_t*)
++  __ ld(x15, Address(x14,
++                     Deoptimization::UnrollBlock::
++                     frame_sizes_offset_in_bytes()));
 +
-+  // If we have to make this method not-entrant we'll overwrite its
-+  // first instruction with a jump.
-+  __ nop();
++  // Counter
++  __ lwu(x13, Address(x14,
++                      Deoptimization::UnrollBlock::
++                      number_of_frames_offset_in_bytes())); // (int)
 +
-+  if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) {
-+    Label L_skip_barrier;
-+    __ mov_metadata(t1, method->method_holder()); // InstanceKlass*
-+    __ clinit_barrier(t1, t0, &L_skip_barrier);
-+    __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
++  // Now adjust the caller's stack to make up for the extra locals but
++  // record the original sp so that we can save it in the skeletal
++  // interpreter frame and the stack walking of interpreter_sender
++  // will get the unextended sp value and not the "real" sp value.
 +
-+    __ bind(L_skip_barrier);
-+  }
++  const Register sender_sp = t1; // temporary register
 +
-+  // Generate stack overflow check
-+  __ bang_stack_with_offset(checked_cast<int>(StackOverflow::stack_shadow_zone_size()));
++  __ lwu(x11, Address(x14,
++                      Deoptimization::UnrollBlock::
++                      caller_adjustment_offset_in_bytes())); // (int)
++  __ mv(sender_sp, sp);
++  __ sub(sp, sp, x11);
 +
-+  // Generate a new frame for the wrapper.
-+  __ enter();
-+  // -2 because return address is already present and so is saved fp
-+  __ sub(sp, sp, stack_size - 2 * wordSize);
++  // Push interpreter frames in a loop
++  Label loop;
++  __ bind(loop);
++  __ ld(x11, Address(x15, 0));       // Load frame size
++  __ sub(x11, x11, 2 * wordSize);    // We'll push pc and fp by hand
++  __ ld(ra, Address(x12, 0));        // Save return address
++  __ enter();                        // and old fp & set new fp
++  __ sub(sp, sp, x11);               // Prolog
++  __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable
++  // This value is corrected by layout_activation_impl
++  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++  __ mv(sender_sp, sp);              // Pass sender_sp to next frame
++  __ add(x15, x15, wordSize);        // Bump array pointer (sizes)
++  __ add(x12, x12, wordSize);        // Bump array pointer (pcs)
++  __ subw(x13, x13, 1);              // Decrement counter
++  __ bgtz(x13, loop);
++  __ ld(ra, Address(x12, 0));        // save final return address
++  // Re-push self-frame
++  __ enter();                        // & old fp & set new fp
 +
-+  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+  assert_cond(bs != NULL);
-+  bs->nmethod_entry_barrier(masm);
++  // Use fp because the frames look interpreted now
++  // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
++  // Don't need the precise return PC here, just precise enough to point into this code blob.
++  address the_pc = __ pc();
++  __ set_last_Java_frame(sp, fp, the_pc, t0);
 +
-+  // Frame is now completed as far as size and linkage.
-+  int frame_complete = ((intptr_t)__ pc()) - start;
++  // Call C code.  Need thread but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.  Call should
++  // restore return values to their stack-slots with the new SP.
++  //
++  // BasicType unpack_frames(JavaThread* thread, int exec_mode)
++  //
 +
-+  // We use x18 as the oop handle for the receiver/klass
-+  // It is callee save so it survives the call to native
++  // n.b. 2 gp args, 0 fp args, integral return type
 +
-+  const Register oop_handle_reg = x18;
++  // sp should already be aligned
++  __ mv(c_rarg0, xthread);
++  __ mvw(c_rarg1, (unsigned)Deoptimization::Unpack_uncommon_trap);
++  offset = 0;
++  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset);
++  __ jalr(x1, t0, offset);
 +
-+  //
-+  // We immediately shuffle the arguments so that any vm call we have to
-+  // make from here on out (sync slow path, jvmti, etc.) we will have
-+  // captured the oops from our caller and have a valid oopMap for
-+  // them.
++  // Set an oopmap for the call site
++  // Use the same PC we used for the last java frame
++  oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
 +
-+  // -----------------
-+  // The Grand Shuffle
++  // Clear fp AND pc
++  __ reset_last_Java_frame(true);
 +
-+  // The Java calling convention is either equal (linux) or denser (win64) than the
-+  // c calling convention. However the because of the jni_env argument the c calling
-+  // convention always has at least one more (and two for static) arguments than Java.
-+  // Therefore if we move the args from java -> c backwards then we will never have
-+  // a register->register conflict and we don't have to build a dependency graph
-+  // and figure out how to break any cycles.
-+  //
++  // Pop self-frame.
++  __ leave();                 // Epilog
 +
-+  // Record esp-based slot for receiver on stack for non-static methods
-+  int receiver_offset = -1;
++  // Jump to interpreter
++  __ ret();
 +
-+  // This is a trick. We double the stack slots so we can claim
-+  // the oops in the caller's frame. Since we are sure to have
-+  // more args than the caller doubling is enough to make
-+  // sure we can capture all the incoming oop args from the
-+  // caller.
-+  //
-+  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
-+  assert_cond(map != NULL);
++  // Make sure all code is generated
++  masm->flush();
 +
-+  int float_args = 0;
-+  int int_args = 0;
++  _uncommon_trap_blob =  UncommonTrapBlob::create(&buffer, oop_maps,
++                                                  SimpleRuntimeFrame::framesize >> 1);
++}
++#endif // COMPILER2
 +
-+#ifdef ASSERT
-+  bool reg_destroyed[RegisterImpl::number_of_registers];
-+  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
-+  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
-+    reg_destroyed[r] = false;
-+  }
-+  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
-+    freg_destroyed[f] = false;
-+  }
++//------------------------------generate_handler_blob------
++//
++// Generate a special Compile2Runtime blob that saves all registers,
++// and setup oopmap.
++//
++SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
++  ResourceMark rm;
++  OopMapSet *oop_maps = new OopMapSet();
++  assert_cond(oop_maps != NULL);
++  OopMap* map = NULL;
 +
-+#endif /* ASSERT */
++  // Allocate space for the code.  Setup code generation tools.
++  CodeBuffer buffer("handler_blob", 2048, 1024);
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++  assert_cond(masm != NULL);
 +
-+  // For JNI natives the incoming and outgoing registers are offset upwards.
-+  GrowableArray<int> arg_order(2 * total_in_args);
-+  VMRegPair tmp_vmreg;
-+  tmp_vmreg.set2(x9->as_VMReg());
++  address start   = __ pc();
++  address call_pc = NULL;
++  int frame_size_in_words = -1;
++  bool cause_return = (poll_type == POLL_AT_RETURN);
++  RegisterSaver reg_saver;
 +
-+  for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
-+    arg_order.push(i);
-+    arg_order.push(c_arg);
-+  }
++  // Save Integer and Float registers.
++  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
 +
-+  int temploc = -1;
-+  for (int ai = 0; ai < arg_order.length(); ai += 2) {
-+    int i = arg_order.at(ai);
-+    int c_arg = arg_order.at(ai + 1);
-+    __ block_comment(err_msg("mv %d -> %d", i, c_arg));
-+    assert(c_arg != -1 && i != -1, "wrong order");
-+#ifdef ASSERT
-+    if (in_regs[i].first()->is_Register()) {
-+      assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
-+    } else if (in_regs[i].first()->is_FloatRegister()) {
-+      assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
-+    }
-+    if (out_regs[c_arg].first()->is_Register()) {
-+      reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
-+    } else if (out_regs[c_arg].first()->is_FloatRegister()) {
-+      freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
-+    }
-+#endif /* ASSERT */
-+    switch (in_sig_bt[i]) {
-+      case T_ARRAY:
-+      case T_OBJECT:
-+        object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
-+                    ((i == 0) && (!is_static)),
-+                    &receiver_offset);
-+        int_args++;
-+        break;
-+      case T_VOID:
-+        break;
++  // The following is basically a call_VM.  However, we need the precise
++  // address of the call in order to generate an oopmap. Hence, we do all the
++  // work outselves.
 +
-+      case T_FLOAT:
-+        float_move(masm, in_regs[i], out_regs[c_arg]);
-+        float_args++;
-+        break;
++  Label retaddr;
++  __ set_last_Java_frame(sp, noreg, retaddr, t0);
 +
-+      case T_DOUBLE:
-+        assert( i + 1 < total_in_args &&
-+                in_sig_bt[i + 1] == T_VOID &&
-+                out_sig_bt[c_arg + 1] == T_VOID, "bad arg list");
-+        double_move(masm, in_regs[i], out_regs[c_arg]);
-+        float_args++;
-+        break;
++  // The return address must always be correct so that frame constructor never
++  // sees an invalid pc.
 +
-+      case T_LONG :
-+        long_move(masm, in_regs[i], out_regs[c_arg]);
-+        int_args++;
-+        break;
++  if (!cause_return) {
++    // overwrite the return address pushed by save_live_registers
++    // Additionally, x18 is a callee-saved register so we can look at
++    // it later to determine if someone changed the return address for
++    // us!
++    __ ld(x18, Address(xthread, JavaThread::saved_exception_pc_offset()));
++    __ sd(x18, Address(fp, frame::return_addr_offset * wordSize));
++  }
 +
-+      case T_ADDRESS:
-+        assert(false, "found T_ADDRESS in java args");
-+        break;
++  // Do the call
++  __ mv(c_rarg0, xthread);
++  int32_t offset = 0;
++  __ la_patchable(t0, RuntimeAddress(call_ptr), offset);
++  __ jalr(x1, t0, offset);
++  __ bind(retaddr);
 +
-+      default:
-+        move32_64(masm, in_regs[i], out_regs[c_arg]);
-+        int_args++;
-+    }
-+  }
++  // Set an oopmap for the call site.  This oopmap will map all
++  // oop-registers and debug-info registers as callee-saved.  This
++  // will allow deoptimization at this safepoint to find all possible
++  // debug-info recordings, as well as let GC find all oops.
 +
-+  // point c_arg at the first arg that is already loaded in case we
-+  // need to spill before we call out
-+  int c_arg = total_c_args - total_in_args;
++  oop_maps->add_gc_map( __ pc() - start, map);
 +
-+  // Pre-load a static method's oop into c_rarg1.
-+  if (method->is_static()) {
++  Label noException;
 +
-+    //  load oop into a register
-+    __ movoop(c_rarg1,
-+              JNIHandles::make_local(method->method_holder()->java_mirror()),
-+              /*immediate*/true);
++  __ reset_last_Java_frame(false);
 +
-+    // Now handlize the static class mirror it's known not-null.
-+    __ sd(c_rarg1, Address(sp, klass_offset));
-+    map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
++  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
 +
-+    // Now get the handle
-+    __ la(c_rarg1, Address(sp, klass_offset));
-+    // and protect the arg if we must spill
-+    c_arg--;
-+  }
++  __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
++  __ beqz(t0, noException);
 +
-+  // Change state to native (we save the return address in the thread, since it might not
-+  // be pushed on the stack when we do a stack traversal).
-+  // We use the same pc/oopMap repeatedly when we call out
++  // Exception pending
 +
-+  Label native_return;
-+  __ set_last_Java_frame(sp, noreg, native_return, t0);
++  reg_saver.restore_live_registers(masm);
 +
-+  Label dtrace_method_entry, dtrace_method_entry_done;
-+  {
-+    int32_t offset = 0;
-+    __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset);
-+    __ lbu(t0, Address(t0, offset));
-+    __ addw(t0, t0, zr);
-+    __ bnez(t0, dtrace_method_entry);
-+    __ bind(dtrace_method_entry_done);
-+  }
++  __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
 +
-+  // RedefineClasses() tracing support for obsolete method entry
-+  if (log_is_enabled(Trace, redefine, class, obsolete)) {
-+    // protect the args we've loaded
-+    save_args(masm, total_c_args, c_arg, out_regs);
-+    __ mov_metadata(c_rarg1, method());
-+    __ call_VM_leaf(
-+      CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
-+      xthread, c_rarg1);
-+    restore_args(masm, total_c_args, c_arg, out_regs);
++  // No exception case
++  __ bind(noException);
++
++  Label no_adjust, bail;
++  if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
++    // If our stashed return pc was modified by the runtime we avoid touching it
++    __ ld(t0, Address(fp, frame::return_addr_offset * wordSize));
++    __ bne(x18, t0, no_adjust);
++
++#ifdef ASSERT
++    // Verify the correct encoding of the poll we're about to skip.
++    // See NativeInstruction::is_lwu_to_zr()
++    __ lwu(t0, Address(x18));
++    __ andi(t1, t0, 0b0000011);
++    __ mv(t2, 0b0000011);
++    __ bne(t1, t2, bail); // 0-6:0b0000011
++    __ srli(t1, t0, 7);
++    __ andi(t1, t1, 0b00000);
++    __ bnez(t1, bail);    // 7-11:0b00000
++    __ srli(t1, t0, 12);
++    __ andi(t1, t1, 0b110);
++    __ mv(t2, 0b110);
++    __ bne(t1, t2, bail); // 12-14:0b110
++#endif
++    // Adjust return pc forward to step over the safepoint poll instruction
++    __ add(x18, x18, NativeInstruction::instruction_size);
++    __ sd(x18, Address(fp, frame::return_addr_offset * wordSize));
 +  }
 +
-+  // Lock a synchronized method
++  __ bind(no_adjust);
++  // Normal exit, restore registers and exit.
 +
-+  // Register definitions used by locking and unlocking
++  reg_saver.restore_live_registers(masm);
++  __ ret();
 +
-+  const Register swap_reg = x10;
-+  const Register obj_reg  = x9;  // Will contain the oop
-+  const Register lock_reg = x30;  // Address of compiler lock object (BasicLock)
-+  const Register old_hdr  = x30;  // value of old header at unlock time
-+  const Register tmp      = ra;
++#ifdef ASSERT
++  __ bind(bail);
++  __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected");
++#endif
 +
-+  Label slow_path_lock;
-+  Label lock_done;
++  // Make sure all code is generated
++  masm->flush();
 +
-+  if (method->is_synchronized()) {
++  // Fill-out other meta info
++  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
++}
 +
-+    const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
++//
++// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
++//
++// Generate a stub that calls into vm to find out the proper destination
++// of a java call. All the argument registers are live at this point
++// but since this is generic code we don't know what they are and the caller
++// must do any gc of the args.
++//
++RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
++  assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
 +
-+    // Get the handle (the 2nd argument)
-+    __ mv(oop_handle_reg, c_rarg1);
++  // allocate space for the code
++  ResourceMark rm;
 +
-+    // Get address of the box
++  CodeBuffer buffer(name, 1000, 512);
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++  assert_cond(masm != NULL);
 +
-+    __ la(lock_reg, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
++  int frame_size_in_words = -1;
++  RegisterSaver reg_saver;
 +
-+    // Load the oop from the handle
-+    __ ld(obj_reg, Address(oop_handle_reg, 0));
++  OopMapSet *oop_maps = new OopMapSet();
++  assert_cond(oop_maps != NULL);
++  OopMap* map = NULL;
 +
-+    if (!UseHeavyMonitors) {
-+      // Load (object->mark() | 1) into swap_reg % x10
-+      __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-+      __ ori(swap_reg, t0, 1);
++  int start = __ offset();
 +
-+      // Save (object->mark() | 1) into BasicLock's displaced header
-+      __ sd(swap_reg, Address(lock_reg, mark_word_offset));
++  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
 +
-+      // src -> dest if dest == x10 else x10 <- dest
-+      {
-+        Label here;
-+        __ cmpxchg_obj_header(x10, lock_reg, obj_reg, t0, lock_done, /*fallthrough*/NULL);
-+      }
++  int frame_complete = __ offset();
 +
-+      // Test if the oopMark is an obvious stack pointer, i.e.,
-+      //  1) (mark & 3) == 0, and
-+      //  2) sp <= mark < mark + os::pagesize()
-+      // These 3 tests can be done by evaluating the following
-+      // expression: ((mark - sp) & (3 - os::vm_page_size())),
-+      // assuming both stack pointer and pagesize have their
-+      // least significant 2 bits clear.
-+      // NOTE: the oopMark is in swap_reg % 10 as the result of cmpxchg
-+
-+      __ sub(swap_reg, swap_reg, sp);
-+      __ andi(swap_reg, swap_reg, 3 - os::vm_page_size());
-+
-+      // Save the test result, for recursive case, the result is zero
-+      __ sd(swap_reg, Address(lock_reg, mark_word_offset));
-+      __ bnez(swap_reg, slow_path_lock);
-+    } else {
-+      __ j(slow_path_lock);
-+    }
++  {
++    Label retaddr;
++    __ set_last_Java_frame(sp, noreg, retaddr, t0);
 +
-+    // Slow path will re-enter here
-+    __ bind(lock_done);
++    __ mv(c_rarg0, xthread);
++    int32_t offset = 0;
++    __ la_patchable(t0, RuntimeAddress(destination), offset);
++    __ jalr(x1, t0, offset);
++    __ bind(retaddr);
 +  }
 +
++  // Set an oopmap for the call site.
++  // We need this not only for callee-saved registers, but also for volatile
++  // registers that the compiler might be keeping live across a safepoint.
 +
-+  // Finally just about ready to make the JNI call
++  oop_maps->add_gc_map( __ offset() - start, map);
 +
-+  // get JNIEnv* which is first argument to native
-+  __ la(c_rarg0, Address(xthread, in_bytes(JavaThread::jni_environment_offset())));
++  // x10 contains the address we are going to jump to assuming no exception got installed
 +
-+  // Now set thread in native
-+  __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
-+  __ mv(t0, _thread_in_native);
-+  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
-+  __ sw(t0, Address(t1));
++  // clear last_Java_sp
++  __ reset_last_Java_frame(false);
++  // check for pending exceptions
++  Label pending;
++  __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
++  __ bnez(t0, pending);
 +
-+  rt_call(masm, native_func);
++  // get the returned Method*
++  __ get_vm_result_2(xmethod, xthread);
++  __ sd(xmethod, Address(sp, reg_saver.reg_offset_in_bytes(xmethod)));
 +
-+  __ bind(native_return);
++  // x10 is where we want to jump, overwrite t0 which is saved and temporary
++  __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(t0)));
++  reg_saver.restore_live_registers(masm);
 +
-+  intptr_t return_pc = (intptr_t) __ pc();
-+  oop_maps->add_gc_map(return_pc - start, map);
++  // We are back the the original state on entry and ready to go.
 +
-+  // Unpack native results.
-+  if (ret_type != T_OBJECT && ret_type != T_ARRAY) {
-+    __ cast_primitive_type(ret_type, x10);
-+  }
++  __ jr(t0);
 +
-+  Label safepoint_in_progress, safepoint_in_progress_done;
-+  Label after_transition;
++  // Pending exception after the safepoint
 +
-+  // Switch thread to "native transition" state before reading the synchronization state.
-+  // This additional state is necessary because reading and testing the synchronization
-+  // state is not atomic w.r.t. GC, as this scenario demonstrates:
-+  //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
-+  //     VM thread changes sync state to synchronizing and suspends threads for GC.
-+  //     Thread A is resumed to finish this native method, but doesn't block here since it
-+  //     didn't see any synchronization is progress, and escapes.
-+  __ mv(t0, _thread_in_native_trans);
++  __ bind(pending);
 +
-+  __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
++  reg_saver.restore_live_registers(masm);
 +
-+  // Force this write out before the read below
-+  __ membar(MacroAssembler::AnyAny);
++  // exception pending => remove activation and forward to exception handler
 +
-+  // check for safepoint operation in progress and/or pending suspend requests
-+  {
-+    // We need an acquire here to ensure that any subsequent load of the
-+    // global SafepointSynchronize::_state flag is ordered after this load
-+    // of the thread-local polling word. We don't want this poll to
-+    // return false (i.e. not safepointing) and a later poll of the global
-+    // SafepointSynchronize::_state spuriously to return true.
-+    // This is to avoid a race when we're in a native->Java transition
-+    // racing the code which wakes up from a safepoint.
-+
-+    __ safepoint_poll(safepoint_in_progress, true /* at_return */, true /* acquire */, false /* in_nmethod */);
-+    __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset()));
-+    __ bnez(t0, safepoint_in_progress);
-+    __ bind(safepoint_in_progress_done);
-+  }
++  __ sd(zr, Address(xthread, JavaThread::vm_result_offset()));
 +
-+  // change thread state
-+  __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
-+  __ mv(t0, _thread_in_Java);
-+  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
-+  __ sw(t0, Address(t1));
-+  __ bind(after_transition);
++  __ ld(x10, Address(xthread, Thread::pending_exception_offset()));
++  __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
 +
-+  Label reguard;
-+  Label reguard_done;
-+  __ lbu(t0, Address(xthread, JavaThread::stack_guard_state_offset()));
-+  __ mv(t1, StackOverflow::stack_guard_yellow_reserved_disabled);
-+  __ beq(t0, t1, reguard);
-+  __ bind(reguard_done);
++  // -------------
++  // make sure all code is generated
++  masm->flush();
 +
-+  // native result if any is live
++  // return the  blob
++  return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
++}
 +
-+  // Unlock
-+  Label unlock_done;
-+  Label slow_path_unlock;
-+  if (method->is_synchronized()) {
++#ifdef COMPILER2
++//------------------------------generate_exception_blob---------------------------
++// creates exception blob at the end
++// Using exception blob, this code is jumped from a compiled method.
++// (see emit_exception_handler in riscv.ad file)
++//
++// Given an exception pc at a call we call into the runtime for the
++// handler in this method. This handler might merely restore state
++// (i.e. callee save registers) unwind the frame and jump to the
++// exception handler for the nmethod if there is no Java level handler
++// for the nmethod.
++//
++// This code is entered with a jmp.
++//
++// Arguments:
++//   x10: exception oop
++//   x13: exception pc
++//
++// Results:
++//   x10: exception oop
++//   x13: exception pc in caller
++//   destination: exception handler of caller
++//
++// Note: the exception pc MUST be at a call (precise debug information)
++//       Registers x10, x13, x12, x14, x15, t0 are not callee saved.
++//
 +
-+    // Get locked oop from the handle we passed to jni
-+    __ ld(obj_reg, Address(oop_handle_reg, 0));
++void OptoRuntime::generate_exception_blob() {
++  assert(!OptoRuntime::is_callee_saved_register(R13_num), "");
++  assert(!OptoRuntime::is_callee_saved_register(R10_num), "");
++  assert(!OptoRuntime::is_callee_saved_register(R12_num), "");
 +
-+    Label done;
++  assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
 +
-+    if (!UseHeavyMonitors) {
-+      // Simple recursive lock?
-+      __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
-+      __ beqz(t0, done);
-+    }
++  // Allocate space for the code
++  ResourceMark rm;
++  // Setup code generation tools
++  CodeBuffer buffer("exception_blob", 2048, 1024);
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++  assert_cond(masm != NULL);
 +
++  // TODO check various assumptions made here
++  //
++  // make sure we do so before running this
 +
-+    // Must save x10 if if it is live now because cmpxchg must use it
-+    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
-+      save_native_result(masm, ret_type, stack_slots);
-+    }
++  address start = __ pc();
 +
-+    if (!UseHeavyMonitors) {
-+      // get address of the stack lock
-+      __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
-+      //  get old displaced header
-+      __ ld(old_hdr, Address(x10, 0));
++  // push fp and retaddr by hand
++  // Exception pc is 'return address' for stack walker
++  __ addi(sp, sp, -2 * wordSize);
++  __ sd(ra, Address(sp, wordSize));
++  __ sd(fp, Address(sp));
++  // there are no callee save registers and we don't expect an
++  // arg reg save area
++#ifndef PRODUCT
++  assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
++#endif
++  // Store exception in Thread object. We cannot pass any arguments to the
++  // handle_exception call, since we do not want to make any assumption
++  // about the size of the frame where the exception happened in.
++  __ sd(x10, Address(xthread, JavaThread::exception_oop_offset()));
++  __ sd(x13, Address(xthread, JavaThread::exception_pc_offset()));
 +
-+      // Atomic swap old header if oop still contains the stack lock
-+      Label succeed;
-+      __ cmpxchg_obj_header(x10, old_hdr, obj_reg, t0, succeed, &slow_path_unlock);
-+      __ bind(succeed);
-+    } else {
-+      __ j(slow_path_unlock);
-+    }
++  // This call does all the hard work.  It checks if an exception handler
++  // exists in the method.
++  // If so, it returns the handler address.
++  // If not, it prepares for stack-unwinding, restoring the callee-save
++  // registers of the frame being removed.
++  //
++  // address OptoRuntime::handle_exception_C(JavaThread* thread)
++  //
++  // n.b. 1 gp arg, 0 fp args, integral return type
 +
-+    // slow path re-enters here
-+    __ bind(unlock_done);
-+    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
-+      restore_native_result(masm, ret_type, stack_slots);
-+    }
++  // the stack should always be aligned
++  address the_pc = __ pc();
++  __ set_last_Java_frame(sp, noreg, the_pc, t0);
++  __ mv(c_rarg0, xthread);
++  int32_t offset = 0;
++  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)), offset);
++  __ jalr(x1, t0, offset);
 +
-+    __ bind(done);
-+  }
 +
-+  Label dtrace_method_exit, dtrace_method_exit_done;
-+  {
-+    int32_t offset = 0;
-+    __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset);
-+    __ lbu(t0, Address(t0, offset));
-+    __ bnez(t0, dtrace_method_exit);
-+    __ bind(dtrace_method_exit_done);
-+  }
++  // handle_exception_C is a special VM call which does not require an explicit
++  // instruction sync afterwards.
 +
-+  __ reset_last_Java_frame(false);
++  // Set an oopmap for the call site.  This oopmap will only be used if we
++  // are unwinding the stack.  Hence, all locations will be dead.
++  // Callee-saved registers will be the same as the frame above (i.e.,
++  // handle_exception_stub), since they were restored when we got the
++  // exception.
 +
-+  // Unbox oop result, e.g. JNIHandles::resolve result.
-+  if (is_reference_type(ret_type)) {
-+    __ resolve_jobject(x10, xthread, t1);
-+  }
++  OopMapSet* oop_maps = new OopMapSet();
++  assert_cond(oop_maps != NULL);
 +
-+  if (CheckJNICalls) {
-+    // clear_pending_jni_exception_check
-+    __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset()));
-+  }
++  oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
 +
-+  // reset handle block
-+  __ ld(x12, Address(xthread, JavaThread::active_handles_offset()));
-+  __ sd(zr, Address(x12, JNIHandleBlock::top_offset_in_bytes()));
++  __ reset_last_Java_frame(false);
 +
-+  __ leave();
++  // Restore callee-saved registers
 +
-+  // Any exception pending?
-+  __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
-+  __ bnez(t0, exception_pending);
++  // fp is an implicitly saved callee saved register (i.e. the calling
++  // convention will save restore it in prolog/epilog) Other than that
++  // there are no callee save registers now that adapter frames are gone.
++  // and we dont' expect an arg reg save area
++  __ ld(fp, Address(sp));
++  __ ld(x13, Address(sp, wordSize));
++  __ addi(sp, sp , 2 * wordSize);
 +
-+  // We're done
-+  __ ret();
++  // x10: exception handler
 +
-+  // Unexpected paths are out of line and go here
++  // We have a handler in x10 (could be deopt blob).
++  __ mv(t0, x10);
 +
-+  // forward the exception
-+  __ bind(exception_pending);
++  // Get the exception oop
++  __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
++  // Get the exception pc in case we are deoptimized
++  __ ld(x14, Address(xthread, JavaThread::exception_pc_offset()));
++#ifdef ASSERT
++  __ sd(zr, Address(xthread, JavaThread::exception_handler_pc_offset()));
++  __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
++#endif
++  // Clear the exception oop so GC no longer processes it as a root.
++  __ sd(zr, Address(xthread, JavaThread::exception_oop_offset()));
 +
-+  // and forward the exception
-+  __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
++  // x10: exception oop
++  // t0:  exception handler
++  // x14: exception pc
++  // Jump to handler
 +
-+  // Slow path locking & unlocking
-+  if (method->is_synchronized()) {
++  __ jr(t0);
 +
-+    __ block_comment("Slow path lock {");
-+    __ bind(slow_path_lock);
++  // Make sure all code is generated
++  masm->flush();
 +
-+    // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
-+    // args are (oop obj, BasicLock* lock, JavaThread* thread)
++  // Set exception blob
++  _exception_blob =  ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
++}
++#endif // COMPILER2
+diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+new file mode 100644
+index 0000000000..272dd9aeb3
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+@@ -0,0 +1,3743 @@
++/*
++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+    // protect the args we've loaded
-+    save_args(masm, total_c_args, c_arg, out_regs);
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "compiler/oopMap.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/universe.hpp"
++#include "nativeInst_riscv.hpp"
++#include "oops/instanceOop.hpp"
++#include "oops/method.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++#include "utilities/align.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++#if INCLUDE_ZGC
++#include "gc/z/zThreadLocalData.hpp"
++#endif
 +
-+    __ mv(c_rarg0, obj_reg);
-+    __ mv(c_rarg1, lock_reg);
-+    __ mv(c_rarg2, xthread);
++// Declaration and definition of StubGenerator (no .hpp file).
++// For a more detailed description of the stub routine structure
++// see the comment in stubRoutines.hpp
 +
-+    // Not a leaf but we have last_Java_frame setup as we want
-+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3);
-+    restore_args(masm, total_c_args, c_arg, out_regs);
++#undef __
++#define __ _masm->
 +
-+#ifdef ASSERT
-+    { Label L;
-+      __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
-+      __ beqz(t0, L);
-+      __ stop("no pending exception allowed on exit from monitorenter");
-+      __ bind(L);
-+    }
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
 +#endif
-+    __ j(lock_done);
 +
-+    __ block_comment("} Slow path lock");
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
 +
-+    __ block_comment("Slow path unlock {");
-+    __ bind(slow_path_unlock);
++// Stub Code definitions
 +
-+    if (ret_type == T_FLOAT || ret_type == T_DOUBLE) {
-+      save_native_result(masm, ret_type, stack_slots);
-+    }
++class StubGenerator: public StubCodeGenerator {
++ private:
 +
-+    __ mv(c_rarg2, xthread);
-+    __ la(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
-+    __ mv(c_rarg0, obj_reg);
++#ifdef PRODUCT
++#define inc_counter_np(counter) ((void)0)
++#else
++  void inc_counter_np_(int& counter) {
++    __ la(t1, ExternalAddress((address)&counter));
++    __ lwu(t0, Address(t1, 0));
++    __ addiw(t0, t0, 1);
++    __ sw(t0, Address(t1, 0));
++  }
++#define inc_counter_np(counter) \
++  BLOCK_COMMENT("inc_counter " #counter); \
++  inc_counter_np_(counter);
++#endif
 +
-+    // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
-+    // NOTE that obj_reg == x9 currently
-+    __ ld(x9, Address(xthread, in_bytes(Thread::pending_exception_offset())));
-+    __ sd(zr, Address(xthread, in_bytes(Thread::pending_exception_offset())));
++  // Call stubs are used to call Java from C
++  //
++  // Arguments:
++  //    c_rarg0:   call wrapper address                   address
++  //    c_rarg1:   result                                 address
++  //    c_rarg2:   result type                            BasicType
++  //    c_rarg3:   method                                 Method*
++  //    c_rarg4:   (interpreter) entry point              address
++  //    c_rarg5:   parameters                             intptr_t*
++  //    c_rarg6:   parameter size (in words)              int
++  //    c_rarg7:   thread                                 Thread*
++  //
++  // There is no return from the stub itself as any Java result
++  // is written to result
++  //
++  // we save x1 (ra) as the return PC at the base of the frame and
++  // link x8 (fp) below it as the frame pointer installing sp (x2)
++  // into fp.
++  //
++  // we save x10-x17, which accounts for all the c arguments.
++  //
++  // TODO: strictly do we need to save them all? they are treated as
++  // volatile by C so could we omit saving the ones we are going to
++  // place in global registers (thread? method?) or those we only use
++  // during setup of the Java call?
++  //
++  // we don't need to save x5 which C uses as an indirect result location
++  // return register.
++  //
++  // we don't need to save x6-x7 and x28-x31 which both C and Java treat as
++  // volatile
++  //
++  // we save x9, x18-x27, f8-f9, and f18-f27 which Java uses as temporary
++  // registers and C expects to be callee-save
++  //
++  // so the stub frame looks like this when we enter Java code
++  //
++  //     [ return_from_Java     ] <--- sp
++  //     [ argument word n      ]
++  //      ...
++  // -34 [ argument word 1      ]
++  // -33 [ saved f27            ] <--- sp_after_call
++  // -32 [ saved f26            ]
++  // -31 [ saved f25            ]
++  // -30 [ saved f24            ]
++  // -29 [ saved f23            ]
++  // -28 [ saved f22            ]
++  // -27 [ saved f21            ]
++  // -26 [ saved f20            ]
++  // -25 [ saved f19            ]
++  // -24 [ saved f18            ]
++  // -23 [ saved f9             ]
++  // -22 [ saved f8             ]
++  // -21 [ saved x27            ]
++  // -20 [ saved x26            ]
++  // -19 [ saved x25            ]
++  // -18 [ saved x24            ]
++  // -17 [ saved x23            ]
++  // -16 [ saved x22            ]
++  // -15 [ saved x21            ]
++  // -14 [ saved x20            ]
++  // -13 [ saved x19            ]
++  // -12 [ saved x18            ]
++  // -11 [ saved x9             ]
++  // -10 [ call wrapper   (x10) ]
++  //  -9 [ result         (x11) ]
++  //  -8 [ result type    (x12) ]
++  //  -7 [ method         (x13) ]
++  //  -6 [ entry point    (x14) ]
++  //  -5 [ parameters     (x15) ]
++  //  -4 [ parameter size (x16) ]
++  //  -3 [ thread         (x17) ]
++  //  -2 [ saved fp       (x8)  ]
++  //  -1 [ saved ra       (x1)  ]
++  //   0 [                      ] <--- fp == saved sp (x2)
 +
-+    rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
++  // Call stub stack layout word offsets from fp
++  enum call_stub_layout {
++    sp_after_call_off  = -33,
 +
-+#ifdef ASSERT
-+    {
-+      Label L;
-+      __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
-+      __ beqz(t0, L);
-+      __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
-+      __ bind(L);
-+    }
-+#endif /* ASSERT */
++    f27_off            = -33,
++    f26_off            = -32,
++    f25_off            = -31,
++    f24_off            = -30,
++    f23_off            = -29,
++    f22_off            = -28,
++    f21_off            = -27,
++    f20_off            = -26,
++    f19_off            = -25,
++    f18_off            = -24,
++    f9_off             = -23,
++    f8_off             = -22,
 +
-+    __ sd(x9, Address(xthread, in_bytes(Thread::pending_exception_offset())));
++    x27_off            = -21,
++    x26_off            = -20,
++    x25_off            = -19,
++    x24_off            = -18,
++    x23_off            = -17,
++    x22_off            = -16,
++    x21_off            = -15,
++    x20_off            = -14,
++    x19_off            = -13,
++    x18_off            = -12,
++    x9_off             = -11,
 +
-+    if (ret_type == T_FLOAT || ret_type == T_DOUBLE) {
-+      restore_native_result(masm, ret_type, stack_slots);
-+    }
-+    __ j(unlock_done);
++    call_wrapper_off   = -10,
++    result_off         = -9,
++    result_type_off    = -8,
++    method_off         = -7,
++    entry_point_off    = -6,
++    parameters_off     = -5,
++    parameter_size_off = -4,
++    thread_off         = -3,
++    fp_f               = -2,
++    retaddr_off        = -1,
++  };
 +
-+    __ block_comment("} Slow path unlock");
++  address generate_call_stub(address& return_address) {
++    assert((int)frame::entry_frame_after_call_words == -(int)sp_after_call_off + 1 &&
++           (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off,
++           "adjust this code");
 +
-+  } // synchronized
++    StubCodeMark mark(this, "StubRoutines", "call_stub");
++    address start = __ pc();
 +
-+  // SLOW PATH Reguard the stack if needed
++    const Address sp_after_call (fp, sp_after_call_off  * wordSize);
 +
-+  __ bind(reguard);
-+  save_native_result(masm, ret_type, stack_slots);
-+  rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
-+  restore_native_result(masm, ret_type, stack_slots);
-+  // and continue
-+  __ j(reguard_done);
++    const Address call_wrapper  (fp, call_wrapper_off   * wordSize);
++    const Address result        (fp, result_off         * wordSize);
++    const Address result_type   (fp, result_type_off    * wordSize);
++    const Address method        (fp, method_off         * wordSize);
++    const Address entry_point   (fp, entry_point_off    * wordSize);
++    const Address parameters    (fp, parameters_off     * wordSize);
++    const Address parameter_size(fp, parameter_size_off * wordSize);
 +
-+  // SLOW PATH safepoint
-+  {
-+    __ block_comment("safepoint {");
-+    __ bind(safepoint_in_progress);
++    const Address thread        (fp, thread_off         * wordSize);
 +
-+    // Don't use call_VM as it will see a possible pending exception and forward it
-+    // and never return here preventing us from clearing _last_native_pc down below.
-+    //
-+    save_native_result(masm, ret_type, stack_slots);
-+    __ mv(c_rarg0, xthread);
-+#ifndef PRODUCT
-+    assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
-+#endif
-+    int32_t offset = 0;
-+    __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)), offset);
-+    __ jalr(x1, t0, offset);
++    const Address f27_save      (fp, f27_off            * wordSize);
++    const Address f26_save      (fp, f26_off            * wordSize);
++    const Address f25_save      (fp, f25_off            * wordSize);
++    const Address f24_save      (fp, f24_off            * wordSize);
++    const Address f23_save      (fp, f23_off            * wordSize);
++    const Address f22_save      (fp, f22_off            * wordSize);
++    const Address f21_save      (fp, f21_off            * wordSize);
++    const Address f20_save      (fp, f20_off            * wordSize);
++    const Address f19_save      (fp, f19_off            * wordSize);
++    const Address f18_save      (fp, f18_off            * wordSize);
++    const Address f9_save       (fp, f9_off             * wordSize);
++    const Address f8_save       (fp, f8_off             * wordSize);
 +
-+    // Restore any method result value
-+    restore_native_result(masm, ret_type, stack_slots);
++    const Address x27_save      (fp, x27_off            * wordSize);
++    const Address x26_save      (fp, x26_off            * wordSize);
++    const Address x25_save      (fp, x25_off            * wordSize);
++    const Address x24_save      (fp, x24_off            * wordSize);
++    const Address x23_save      (fp, x23_off            * wordSize);
++    const Address x22_save      (fp, x22_off            * wordSize);
++    const Address x21_save      (fp, x21_off            * wordSize);
++    const Address x20_save      (fp, x20_off            * wordSize);
++    const Address x19_save      (fp, x19_off            * wordSize);
++    const Address x18_save      (fp, x18_off            * wordSize);
 +
-+    __ j(safepoint_in_progress_done);
-+    __ block_comment("} safepoint");
-+  }
++    const Address x9_save       (fp, x9_off             * wordSize);
 +
-+  // SLOW PATH dtrace support
-+  {
-+    __ block_comment("dtrace entry {");
-+    __ bind(dtrace_method_entry);
++    // stub code
 +
-+    // We have all of the arguments setup at this point. We must not touch any register
-+    // argument registers at this point (what if we save/restore them there are no oop?
++    address riscv_entry = __ pc();
 +
-+    save_args(masm, total_c_args, c_arg, out_regs);
-+    __ mov_metadata(c_rarg1, method());
-+    __ call_VM_leaf(
-+      CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
-+      xthread, c_rarg1);
-+    restore_args(masm, total_c_args, c_arg, out_regs);
-+    __ j(dtrace_method_entry_done);
-+    __ block_comment("} dtrace entry");
-+  }
-+
-+  {
-+    __ block_comment("dtrace exit {");
-+    __ bind(dtrace_method_exit);
-+    save_native_result(masm, ret_type, stack_slots);
-+    __ mov_metadata(c_rarg1, method());
-+    __ call_VM_leaf(
-+         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
-+         xthread, c_rarg1);
-+    restore_native_result(masm, ret_type, stack_slots);
-+    __ j(dtrace_method_exit_done);
-+    __ block_comment("} dtrace exit");
-+  }
++    // set up frame and move sp to end of save area
++    __ enter();
++    __ addi(sp, fp, sp_after_call_off * wordSize);
 +
-+  __ flush();
++    // save register parameters and Java temporary/global registers
++    // n.b. we save thread even though it gets installed in
++    // xthread because we want to sanity check tp later
++    __ sd(c_rarg7, thread);
++    __ sw(c_rarg6, parameter_size);
++    __ sd(c_rarg5, parameters);
++    __ sd(c_rarg4, entry_point);
++    __ sd(c_rarg3, method);
++    __ sd(c_rarg2, result_type);
++    __ sd(c_rarg1, result);
++    __ sd(c_rarg0, call_wrapper);
 +
-+  nmethod *nm = nmethod::new_native_nmethod(method,
-+                                            compile_id,
-+                                            masm->code(),
-+                                            vep_offset,
-+                                            frame_complete,
-+                                            stack_slots / VMRegImpl::slots_per_word,
-+                                            (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
-+                                            in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
-+                                            oop_maps);
-+  assert(nm != NULL, "create native nmethod fail!");
-+  return nm;
-+}
++    __ sd(x9, x9_save);
 +
-+// this function returns the adjust size (in number of words) to a c2i adapter
-+// activation for use during deoptimization
-+int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
-+  assert(callee_locals >= callee_parameters,
-+         "test and remove; got more parms than locals");
-+  if (callee_locals < callee_parameters) {
-+    return 0;                   // No adjustment for negative locals
-+  }
-+  int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
-+  // diff is counted in stack words
-+  return align_up(diff, 2);
-+}
++    __ sd(x18, x18_save);
++    __ sd(x19, x19_save);
++    __ sd(x20, x20_save);
++    __ sd(x21, x21_save);
++    __ sd(x22, x22_save);
++    __ sd(x23, x23_save);
++    __ sd(x24, x24_save);
++    __ sd(x25, x25_save);
++    __ sd(x26, x26_save);
++    __ sd(x27, x27_save);
 +
-+//------------------------------generate_deopt_blob----------------------------
-+void SharedRuntime::generate_deopt_blob() {
-+  // Allocate space for the code
-+  ResourceMark rm;
-+  // Setup code generation tools
-+  int pad = 0;
-+  CodeBuffer buffer("deopt_blob", 2048 + pad, 1024);
-+  MacroAssembler* masm = new MacroAssembler(&buffer);
-+  int frame_size_in_words = -1;
-+  OopMap* map = NULL;
-+  OopMapSet *oop_maps = new OopMapSet();
-+  assert_cond(masm != NULL && oop_maps != NULL);
-+  RegisterSaver reg_saver(COMPILER2_OR_JVMCI != 0);
++    __ fsd(f8,  f8_save);
++    __ fsd(f9,  f9_save);
++    __ fsd(f18, f18_save);
++    __ fsd(f19, f19_save);
++    __ fsd(f20, f20_save);
++    __ fsd(f21, f21_save);
++    __ fsd(f22, f22_save);
++    __ fsd(f23, f23_save);
++    __ fsd(f24, f24_save);
++    __ fsd(f25, f25_save);
++    __ fsd(f26, f26_save);
++    __ fsd(f27, f27_save);
 +
-+  // -------------
-+  // This code enters when returning to a de-optimized nmethod.  A return
-+  // address has been pushed on the the stack, and return values are in
-+  // registers.
-+  // If we are doing a normal deopt then we were called from the patched
-+  // nmethod from the point we returned to the nmethod. So the return
-+  // address on the stack is wrong by NativeCall::instruction_size
-+  // We will adjust the value so it looks like we have the original return
-+  // address on the stack (like when we eagerly deoptimized).
-+  // In the case of an exception pending when deoptimizing, we enter
-+  // with a return address on the stack that points after the call we patched
-+  // into the exception handler. We have the following register state from,
-+  // e.g., the forward exception stub (see stubGenerator_riscv.cpp).
-+  //    x10: exception oop
-+  //    x9: exception handler
-+  //    x13: throwing pc
-+  // So in this case we simply jam x13 into the useless return address and
-+  // the stack looks just like we want.
-+  //
-+  // At this point we need to de-opt.  We save the argument return
-+  // registers.  We call the first C routine, fetch_unroll_info().  This
-+  // routine captures the return values and returns a structure which
-+  // describes the current frame size and the sizes of all replacement frames.
-+  // The current frame is compiled code and may contain many inlined
-+  // functions, each with their own JVM state.  We pop the current frame, then
-+  // push all the new frames.  Then we call the C routine unpack_frames() to
-+  // populate these frames.  Finally unpack_frames() returns us the new target
-+  // address.  Notice that callee-save registers are BLOWN here; they have
-+  // already been captured in the vframeArray at the time the return PC was
-+  // patched.
-+  address start = __ pc();
-+  Label cont;
++    // install Java thread in global register now we have saved
++    // whatever value it held
++    __ mv(xthread, c_rarg7);
 +
-+  // Prolog for non exception case!
++    // And method
++    __ mv(xmethod, c_rarg3);
 +
-+  // Save everything in sight.
-+  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
++    // set up the heapbase register
++    __ reinit_heapbase();
 +
-+  // Normal deoptimization.  Save exec mode for unpack_frames.
-+  __ mvw(xcpool, Deoptimization::Unpack_deopt); // callee-saved
-+  __ j(cont);
++#ifdef ASSERT
++    // make sure we have no pending exceptions
++    {
++      Label L;
++      __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
++      __ beqz(t0, L);
++      __ stop("StubRoutines::call_stub: entered with pending exception");
++      __ BIND(L);
++    }
++#endif
++    // pass parameters if any
++    __ mv(esp, sp);
++    __ slli(t0, c_rarg6, LogBytesPerWord);
++    __ sub(t0, sp, t0); // Move SP out of the way
++    __ andi(sp, t0, -2 * wordSize);
 +
-+  int reexecute_offset = __ pc() - start;
++    BLOCK_COMMENT("pass parameters if any");
++    Label parameters_done;
++    // parameter count is still in c_rarg6
++    // and parameter pointer identifying param 1 is in c_rarg5
++    __ beqz(c_rarg6, parameters_done);
 +
-+  // Reexecute case
-+  // return address is the pc describes what bci to do re-execute at
++    address loop = __ pc();
++    __ ld(t0, c_rarg5, 0);
++    __ addi(c_rarg5, c_rarg5, wordSize);
++    __ addi(c_rarg6, c_rarg6, -1);
++    __ push_reg(t0);
++    __ bgtz(c_rarg6, loop);
 +
-+  // No need to update map as each call to save_live_registers will produce identical oopmap
-+  (void) reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
++    __ BIND(parameters_done);
 +
-+  __ mvw(xcpool, Deoptimization::Unpack_reexecute); // callee-saved
-+  __ j(cont);
++    // call Java entry -- passing methdoOop, and current sp
++    //      xmethod: Method*
++    //      x30: sender sp
++    BLOCK_COMMENT("call Java function");
++    __ mv(x30, sp);
++    __ jalr(c_rarg4);
 +
-+  int exception_offset = __ pc() - start;
++    // save current address for use by exception handling code
 +
-+  // Prolog for exception case
++    return_address = __ pc();
 +
-+  // all registers are dead at this entry point, except for x10, and
-+  // x13 which contain the exception oop and exception pc
-+  // respectively.  Set them in TLS and fall thru to the
-+  // unpack_with_exception_in_tls entry point.
++    // store result depending on type (everything that is not
++    // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
++    // n.b. this assumes Java returns an integral result in x10
++    // and a floating result in j_farg0
++    __ ld(j_rarg2, result);
++    Label is_long, is_float, is_double, exit;
++    __ ld(j_rarg1, result_type);
++    __ mv(t0, (u1)T_OBJECT);
++    __ beq(j_rarg1, t0, is_long);
++    __ mv(t0, (u1)T_LONG);
++    __ beq(j_rarg1, t0, is_long);
++    __ mv(t0, (u1)T_FLOAT);
++    __ beq(j_rarg1, t0, is_float);
++    __ mv(t0, (u1)T_DOUBLE);
++    __ beq(j_rarg1, t0, is_double);
 +
-+  __ sd(x13, Address(xthread, JavaThread::exception_pc_offset()));
-+  __ sd(x10, Address(xthread, JavaThread::exception_oop_offset()));
++    // handle T_INT case
++    __ sw(x10, Address(j_rarg2));
 +
-+  int exception_in_tls_offset = __ pc() - start;
++    __ BIND(exit);
 +
-+  // new implementation because exception oop is now passed in JavaThread
++    // pop parameters
++    __ addi(esp, fp, sp_after_call_off * wordSize);
 +
-+  // Prolog for exception case
-+  // All registers must be preserved because they might be used by LinearScan
-+  // Exceptiop oop and throwing PC are passed in JavaThread
-+  // tos: stack at point of call to method that threw the exception (i.e. only
-+  // args are on the stack, no return address)
++#ifdef ASSERT
++    // verify that threads correspond
++    {
++      Label L, S;
++      __ ld(t0, thread);
++      __ bne(xthread, t0, S);
++      __ get_thread(t0);
++      __ beq(xthread, t0, L);
++      __ BIND(S);
++      __ stop("StubRoutines::call_stub: threads must correspond");
++      __ BIND(L);
++    }
++#endif
 +
-+  // The return address pushed by save_live_registers will be patched
-+  // later with the throwing pc. The correct value is not available
-+  // now because loading it from memory would destroy registers.
++    // restore callee-save registers
++    __ fld(f27, f27_save);
++    __ fld(f26, f26_save);
++    __ fld(f25, f25_save);
++    __ fld(f24, f24_save);
++    __ fld(f23, f23_save);
++    __ fld(f22, f22_save);
++    __ fld(f21, f21_save);
++    __ fld(f20, f20_save);
++    __ fld(f19, f19_save);
++    __ fld(f18, f18_save);
++    __ fld(f9,  f9_save);
++    __ fld(f8,  f8_save);
 +
-+  // NB: The SP at this point must be the SP of the method that is
-+  // being deoptimized.  Deoptimization assumes that the frame created
-+  // here by save_live_registers is immediately below the method's SP.
-+  // This is a somewhat fragile mechanism.
++    __ ld(x27, x27_save);
++    __ ld(x26, x26_save);
++    __ ld(x25, x25_save);
++    __ ld(x24, x24_save);
++    __ ld(x23, x23_save);
++    __ ld(x22, x22_save);
++    __ ld(x21, x21_save);
++    __ ld(x20, x20_save);
++    __ ld(x19, x19_save);
++    __ ld(x18, x18_save);
 +
-+  // Save everything in sight.
-+  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
++    __ ld(x9, x9_save);
 +
-+  // Now it is safe to overwrite any register
++    __ ld(c_rarg0, call_wrapper);
++    __ ld(c_rarg1, result);
++    __ ld(c_rarg2, result_type);
++    __ ld(c_rarg3, method);
++    __ ld(c_rarg4, entry_point);
++    __ ld(c_rarg5, parameters);
++    __ ld(c_rarg6, parameter_size);
++    __ ld(c_rarg7, thread);
 +
-+  // Deopt during an exception.  Save exec mode for unpack_frames.
-+  __ li(xcpool, Deoptimization::Unpack_exception); // callee-saved
++    // leave frame and return to caller
++    __ leave();
++    __ ret();
 +
-+  // load throwing pc from JavaThread and patch it as the return address
-+  // of the current frame. Then clear the field in JavaThread
++    // handle return types different from T_INT
 +
-+  __ ld(x13, Address(xthread, JavaThread::exception_pc_offset()));
-+  __ sd(x13, Address(fp, frame::return_addr_offset * wordSize));
-+  __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
++    __ BIND(is_long);
++    __ sd(x10, Address(j_rarg2, 0));
++    __ j(exit);
 +
-+#ifdef ASSERT
-+  // verify that there is really an exception oop in JavaThread
-+  __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
-+  __ verify_oop(x10);
++    __ BIND(is_float);
++    __ fsw(j_farg0, Address(j_rarg2, 0), t0);
++    __ j(exit);
 +
-+  // verify that there is no pending exception
-+  Label no_pending_exception;
-+  __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
-+  __ beqz(t0, no_pending_exception);
-+  __ stop("must not have pending exception here");
-+  __ bind(no_pending_exception);
-+#endif
++    __ BIND(is_double);
++    __ fsd(j_farg0, Address(j_rarg2, 0), t0);
++    __ j(exit);
 +
-+  __ bind(cont);
++    return start;
++  }
 +
-+  // Call C code.  Need thread and this frame, but NOT official VM entry
-+  // crud.  We cannot block on this call, no GC can happen.
++  // Return point for a Java call if there's an exception thrown in
++  // Java code.  The exception is caught and transformed into a
++  // pending exception stored in JavaThread that can be tested from
++  // within the VM.
 +  //
-+  // UnrollBlock* fetch_unroll_info(JavaThread* thread)
++  // Note: Usually the parameters are removed by the callee. In case
++  // of an exception crossing an activation frame boundary, that is
++  // not the case if the callee is compiled code => need to setup the
++  // sp.
++  //
++  // x10: exception oop
 +
-+  // fetch_unroll_info needs to call last_java_frame().
++  address generate_catch_exception() {
++    StubCodeMark mark(this, "StubRoutines", "catch_exception");
++    address start = __ pc();
++
++    // same as in generate_call_stub():
++    const Address thread(fp, thread_off * wordSize);
 +
-+  Label retaddr;
-+  __ set_last_Java_frame(sp, noreg, retaddr, t0);
 +#ifdef ASSERT
-+  {
-+    Label L;
-+    __ ld(t0, Address(xthread,
-+                              JavaThread::last_Java_fp_offset()));
-+    __ beqz(t0, L);
-+    __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
-+    __ bind(L);
-+  }
-+#endif // ASSERT
-+  __ mv(c_rarg0, xthread);
-+  __ mv(c_rarg1, xcpool);
-+  int32_t offset = 0;
-+  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)), offset);
-+  __ jalr(x1, t0, offset);
-+  __ bind(retaddr);
++    // verify that threads correspond
++    {
++      Label L, S;
++      __ ld(t0, thread);
++      __ bne(xthread, t0, S);
++      __ get_thread(t0);
++      __ beq(xthread, t0, L);
++      __ bind(S);
++      __ stop("StubRoutines::catch_exception: threads must correspond");
++      __ bind(L);
++    }
++#endif
 +
-+  // Need to have an oopmap that tells fetch_unroll_info where to
-+  // find any register it might need.
-+  oop_maps->add_gc_map(__ pc() - start, map);
++    // set pending exception
++    __ verify_oop(x10);
 +
-+  __ reset_last_Java_frame(false);
++    __ sd(x10, Address(xthread, Thread::pending_exception_offset()));
++    __ mv(t0, (address)__FILE__);
++    __ sd(t0, Address(xthread, Thread::exception_file_offset()));
++    __ mv(t0, (int)__LINE__);
++    __ sw(t0, Address(xthread, Thread::exception_line_offset()));
 +
-+  // Load UnrollBlock* into x15
-+  __ mv(x15, x10);
++    // complete return to VM
++    assert(StubRoutines::_call_stub_return_address != NULL,
++           "_call_stub_return_address must have been generated before");
++    __ j(StubRoutines::_call_stub_return_address);
 +
-+  __ lwu(xcpool, Address(x15, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
-+  Label noException;
-+  __ li(t0, Deoptimization::Unpack_exception);
-+  __ bne(xcpool, t0, noException); // Was exception pending?
-+  __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
-+  __ ld(x13, Address(xthread, JavaThread::exception_pc_offset()));
-+  __ sd(zr, Address(xthread, JavaThread::exception_oop_offset()));
-+  __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
++    return start;
++  }
 +
-+  __ verify_oop(x10);
++  // Continuation point for runtime calls returning with a pending
++  // exception.  The pending exception check happened in the runtime
++  // or native call stub.  The pending exception in Thread is
++  // converted into a Java-level exception.
++  //
++  // Contract with Java-level exception handlers:
++  // x10: exception
++  // x13: throwing pc
++  //
++  // NOTE: At entry of this stub, exception-pc must be in RA !!
 +
-+  // Overwrite the result registers with the exception results.
-+  __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
++  // NOTE: this is always used as a jump target within generated code
++  // so it just needs to be generated code with no x86 prolog
 +
-+  __ bind(noException);
++  address generate_forward_exception() {
++    StubCodeMark mark(this, "StubRoutines", "forward exception");
++    address start = __ pc();
 +
-+  // Only register save data is on the stack.
-+  // Now restore the result registers.  Everything else is either dead
-+  // or captured in the vframeArray.
++    // Upon entry, RA points to the return address returning into
++    // Java (interpreted or compiled) code; i.e., the return address
++    // becomes the throwing pc.
++    //
++    // Arguments pushed before the runtime call are still on the stack
++    // but the exception handler will reset the stack pointer ->
++    // ignore them.  A potential result in registers can be ignored as
++    // well.
 +
-+  // Restore fp result register
-+  __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
-+  // Restore integer result register
-+  __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
++#ifdef ASSERT
++    // make sure this code is only executed if there is a pending exception
++    {
++      Label L;
++      __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
++      __ bnez(t0, L);
++      __ stop("StubRoutines::forward exception: no pending exception (1)");
++      __ bind(L);
++    }
++#endif
 +
-+  // Pop all of the register save area off the stack
-+  __ add(sp, sp, frame_size_in_words * wordSize);
++    // compute exception handler into x9
 +
-+  // All of the register save area has been popped of the stack. Only the
-+  // return address remains.
++    // call the VM to find the handler address associated with the
++    // caller address. pass thread in x10 and caller pc (ret address)
++    // in x11. n.b. the caller pc is in ra, unlike x86 where it is on
++    // the stack.
++    __ mv(c_rarg1, ra);
++    // ra will be trashed by the VM call so we move it to x9
++    // (callee-saved) because we also need to pass it to the handler
++    // returned by this call.
++    __ mv(x9, ra);
++    BLOCK_COMMENT("call exception_handler_for_return_address");
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address,
++                         SharedRuntime::exception_handler_for_return_address),
++                    xthread, c_rarg1);
++    // we should not really care that ra is no longer the callee
++    // address. we saved the value the handler needs in x9 so we can
++    // just copy it to x13. however, the C2 handler will push its own
++    // frame and then calls into the VM and the VM code asserts that
++    // the PC for the frame above the handler belongs to a compiled
++    // Java method. So, we restore ra here to satisfy that assert.
++    __ mv(ra, x9);
++    // setup x10 & x13 & clear pending exception
++    __ mv(x13, x9);
++    __ mv(x9, x10);
++    __ ld(x10, Address(xthread, Thread::pending_exception_offset()));
++    __ sd(zr, Address(xthread, Thread::pending_exception_offset()));
 +
-+  // Pop all the frames we must move/replace.
++#ifdef ASSERT
++    // make sure exception is set
++    {
++      Label L;
++      __ bnez(x10, L);
++      __ stop("StubRoutines::forward exception: no pending exception (2)");
++      __ bind(L);
++    }
++#endif
++
++    // continue at exception handler
++    // x10: exception
++    // x13: throwing pc
++    // x9: exception handler
++    __ verify_oop(x10);
++    __ jr(x9);
++
++    return start;
++  }
++
++  // Non-destructive plausibility checks for oops
 +  //
-+  // Frame picture (youngest to oldest)
-+  // 1: self-frame (no frame link)
-+  // 2: deopting frame  (no frame link)
-+  // 3: caller of deopting frame (could be compiled/interpreted).
++  // Arguments:
++  //    x10: oop to verify
++  //    t0: error message
 +  //
-+  // Note: by leaving the return address of self-frame on the stack
-+  // and using the size of frame 2 to adjust the stack
-+  // when we are done the return to frame 3 will still be on the stack.
++  // Stack after saving c_rarg3:
++  //    [tos + 0]: saved c_rarg3
++  //    [tos + 1]: saved c_rarg2
++  //    [tos + 2]: saved ra
++  //    [tos + 3]: saved t1
++  //    [tos + 4]: saved x10
++  //    [tos + 5]: saved t0
++  address generate_verify_oop() {
 +
-+  // Pop deoptimized frame
-+  __ lwu(x12, Address(x15, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
-+  __ sub(x12, x12, 2 * wordSize);
-+  __ add(sp, sp, x12);
-+  __ ld(fp, Address(sp, 0));
-+  __ ld(ra, Address(sp, wordSize));
-+  __ addi(sp, sp, 2 * wordSize);
-+  // RA should now be the return address to the caller (3)
++    StubCodeMark mark(this, "StubRoutines", "verify_oop");
++    address start = __ pc();
 +
-+#ifdef ASSERT
-+  // Compilers generate code that bang the stack by as much as the
-+  // interpreter would need. So this stack banging should never
-+  // trigger a fault. Verify that it does not on non product builds.
-+  __ lwu(x9, Address(x15, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
-+  __ bang_stack_size(x9, x12);
-+#endif
-+  // Load address of array of frame pcs into x12
-+  __ ld(x12, Address(x15, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
++    Label exit, error;
 +
-+  // Load address of array of frame sizes into x14
-+  __ ld(x14, Address(x15, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
++    __ push_reg(RegSet::of(c_rarg2, c_rarg3), sp); // save c_rarg2 and c_rarg3
 +
-+  // Load counter into x13
-+  __ lwu(x13, Address(x15, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
++    __ la(c_rarg2, ExternalAddress((address) StubRoutines::verify_oop_count_addr()));
++    __ ld(c_rarg3, Address(c_rarg2));
++    __ add(c_rarg3, c_rarg3, 1);
++    __ sd(c_rarg3, Address(c_rarg2));
 +
-+  // Now adjust the caller's stack to make up for the extra locals
-+  // but record the original sp so that we can save it in the skeletal interpreter
-+  // frame and the stack walking of interpreter_sender will get the unextended sp
-+  // value and not the "real" sp value.
++    // object is in x10
++    // make sure object is 'reasonable'
++    __ beqz(x10, exit); // if obj is NULL it is OK
 +
-+  const Register sender_sp = x16;
++    // Check if the oop is in the right area of memory
++    __ mv(c_rarg3, (intptr_t) Universe::verify_oop_mask());
++    __ andr(c_rarg2, x10, c_rarg3);
++    __ mv(c_rarg3, (intptr_t) Universe::verify_oop_bits());
 +
-+  __ mv(sender_sp, sp);
-+  __ lwu(x9, Address(x15,
-+                     Deoptimization::UnrollBlock::
-+                     caller_adjustment_offset_in_bytes()));
-+  __ sub(sp, sp, x9);
++    // Compare c_rarg2 and c_rarg3.
++    __ bne(c_rarg2, c_rarg3, error);
 +
-+  // Push interpreter frames in a loop
-+  __ li(t0, 0xDEADDEAD);               // Make a recognizable pattern
-+  __ mv(t1, t0);
-+  Label loop;
-+  __ bind(loop);
-+  __ ld(x9, Address(x14, 0));          // Load frame size
-+  __ addi(x14, x14, wordSize);
-+  __ sub(x9, x9, 2 * wordSize);        // We'll push pc and fp by hand
-+  __ ld(ra, Address(x12, 0));          // Load pc
-+  __ addi(x12, x12, wordSize);
-+  __ enter();                          // Save old & set new fp
-+  __ sub(sp, sp, x9);                  // Prolog
-+  // This value is corrected by layout_activation_impl
-+  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
-+  __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable
-+  __ mv(sender_sp, sp);                // Pass sender_sp to next frame
-+  __ addi(x13, x13, -1);               // Decrement counter
-+  __ bnez(x13, loop);
++    // make sure klass is 'reasonable', which is not zero.
++    __ load_klass(x10, x10);  // get klass
++    __ beqz(x10, error);      // if klass is NULL it is broken
 +
-+    // Re-push self-frame
-+  __ ld(ra, Address(x12));
-+  __ enter();
++    // return if everything seems ok
++    __ bind(exit);
 +
-+  // Allocate a full sized register save area.  We subtract 2 because
-+  // enter() just pushed 2 words
-+  __ sub(sp, sp, (frame_size_in_words - 2) * wordSize);
++    __ pop_reg(RegSet::of(c_rarg2, c_rarg3), sp);  // pop c_rarg2 and c_rarg3
++    __ ret();
 +
-+  // Restore frame locals after moving the frame
-+  __ fsd(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
-+  __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
++    // handle errors
++    __ bind(error);
++    __ pop_reg(RegSet::of(c_rarg2, c_rarg3), sp); // pop c_rarg2 and c_rarg3
 +
-+  // Call C code.  Need thread but NOT official VM entry
-+  // crud.  We cannot block on this call, no GC can happen.  Call should
-+  // restore return values to their stack-slots with the new SP.
++    __ pusha();
++    // debug(char* msg, int64_t pc, int64_t regs[])
++    __ mv(c_rarg0, t0);             // pass address of error message
++    __ mv(c_rarg1, ra);             // pass return address
++    __ mv(c_rarg2, sp);             // pass address of regs on stack
++#ifndef PRODUCT
++    assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
++#endif
++    BLOCK_COMMENT("call MacroAssembler::debug");
++    int32_t offset = 0;
++    __ movptr_with_offset(t0, CAST_FROM_FN_PTR(address, MacroAssembler::debug64), offset);
++    __ jalr(x1, t0, offset);
++    __ ebreak();
++
++    return start;
++  }
++
++  // The inner part of zero_words().
 +  //
-+  // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode)
++  // Inputs:
++  // x28: the HeapWord-aligned base address of an array to zero.
++  // x29: the count in HeapWords, x29 > 0.
++  //
++  // Returns x28 and x29, adjusted for the caller to clear.
++  // x28: the base address of the tail of words left to clear.
++  // x29: the number of words in the tail.
++  //      x29 < MacroAssembler::zero_words_block_size.
 +
-+  // Use fp because the frames look interpreted now
-+  // Don't need the precise return PC here, just precise enough to point into this code blob.
-+  address the_pc = __ pc();
-+  __ set_last_Java_frame(sp, fp, the_pc, t0);
++  address generate_zero_blocks() {
++    Label done;
 +
-+  __ mv(c_rarg0, xthread);
-+  __ mv(c_rarg1, xcpool); // second arg: exec_mode
-+  offset = 0;
-+  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset);
-+  __ jalr(x1, t0, offset);
++    const Register base = x28, cnt = x29;
 +
-+  // Set an oopmap for the call site
-+  // Use the same PC we used for the last java frame
-+  oop_maps->add_gc_map(the_pc - start,
-+                       new OopMap(frame_size_in_words, 0));
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "zero_blocks");
++    address start = __ pc();
 +
-+  // Clear fp AND pc
-+  __ reset_last_Java_frame(true);
++    {
++      // Clear the remaining blocks.
++      Label loop;
++      __ sub(cnt, cnt, MacroAssembler::zero_words_block_size);
++      __ bltz(cnt, done);
++      __ bind(loop);
++      for (int i = 0; i < MacroAssembler::zero_words_block_size; i++) {
++        __ sd(zr, Address(base, 0));
++        __ add(base, base, 8);
++      }
++      __ sub(cnt, cnt, MacroAssembler::zero_words_block_size);
++      __ bgez(cnt, loop);
++      __ bind(done);
++      __ add(cnt, cnt, MacroAssembler::zero_words_block_size);
++    }
 +
-+  // Collect return values
-+  __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
-+  __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
++    __ ret();
 +
-+  // Pop self-frame.
-+  __ leave();                           // Epilog
++    return start;
++  }
 +
-+  // Jump to interpreter
-+  __ ret();
++  typedef enum {
++    copy_forwards = 1,
++    copy_backwards = -1
++  } copy_direction;
 +
-+  // Make sure all code is generated
-+  masm->flush();
++  // Bulk copy of blocks of 8 words.
++  //
++  // count is a count of words.
++  //
++  // Precondition: count >= 8
++  //
++  // Postconditions:
++  //
++  // The least significant bit of count contains the remaining count
++  // of words to copy.  The rest of count is trash.
++  //
++  // s and d are adjusted to point to the remaining words to copy
++  //
++  void generate_copy_longs(Label &start, Register s, Register d, Register count,
++                           copy_direction direction) {
++    int unit = wordSize * direction;
++    int bias = wordSize;
 +
-+  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
-+  assert(_deopt_blob != NULL, "create deoptimization blob fail!");
-+  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
-+}
++    const Register tmp_reg0 = x13, tmp_reg1 = x14, tmp_reg2 = x15, tmp_reg3 = x16,
++      tmp_reg4 = x17, tmp_reg5 = x7, tmp_reg6 = x28, tmp_reg7 = x29;
 +
-+// Number of stack slots between incoming argument block and the start of
-+// a new frame. The PROLOG must add this many slots to the stack. The
-+// EPILOG must remove this many slots.
-+// RISCV needs two words for RA (return address) and FP (frame pointer).
-+uint SharedRuntime::in_preserve_stack_slots() {
-+  return 2 * VMRegImpl::slots_per_word;
-+}
++    const Register stride = x30;
 +
-+uint SharedRuntime::out_preserve_stack_slots() {
-+  return 0;
-+}
++    assert_different_registers(t0, tmp_reg0, tmp_reg1, tmp_reg2, tmp_reg3,
++      tmp_reg4, tmp_reg5, tmp_reg6, tmp_reg7);
++    assert_different_registers(s, d, count, t0);
 +
-+#ifdef COMPILER2
-+//------------------------------generate_uncommon_trap_blob--------------------
-+void SharedRuntime::generate_uncommon_trap_blob() {
-+  // Allocate space for the code
-+  ResourceMark rm;
-+  // Setup code generation tools
-+  CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
-+  MacroAssembler* masm = new MacroAssembler(&buffer);
-+  assert_cond(masm != NULL);
++    Label again, drain;
++    const char* stub_name = NULL;
++    if (direction == copy_forwards) {
++      stub_name = "forward_copy_longs";
++    } else {
++      stub_name = "backward_copy_longs";
++    }
++    StubCodeMark mark(this, "StubRoutines", stub_name);
++    __ align(CodeEntryAlignment);
++    __ bind(start);
 +
-+  assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
++    if (direction == copy_forwards) {
++      __ sub(s, s, bias);
++      __ sub(d, d, bias);
++    }
 +
-+  address start = __ pc();
++#ifdef ASSERT
++    // Make sure we are never given < 8 words
++    {
++      Label L;
 +
-+  // Push self-frame.  We get here with a return address in RA
-+  // and sp should be 16 byte aligned
-+  // push fp and retaddr by hand
-+  __ addi(sp, sp, -2 * wordSize);
-+  __ sd(ra, Address(sp, wordSize));
-+  __ sd(fp, Address(sp, 0));
-+  // we don't expect an arg reg save area
-+#ifndef PRODUCT
-+  assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
++      __ mv(t0, 8);
++      __ bge(count, t0, L);
++      __ stop("genrate_copy_longs called with < 8 words");
++      __ bind(L);
++    }
 +#endif
-+  // compiler left unloaded_class_index in j_rarg0 move to where the
-+  // runtime expects it.
-+  __ addiw(c_rarg1, j_rarg0, 0);
 +
-+  // we need to set the past SP to the stack pointer of the stub frame
-+  // and the pc to the address where this runtime call will return
-+  // although actually any pc in this code blob will do).
-+  Label retaddr;
-+  __ set_last_Java_frame(sp, noreg, retaddr, t0);
++    __ ld(tmp_reg0, Address(s, 1 * unit));
++    __ ld(tmp_reg1, Address(s, 2 * unit));
++    __ ld(tmp_reg2, Address(s, 3 * unit));
++    __ ld(tmp_reg3, Address(s, 4 * unit));
++    __ ld(tmp_reg4, Address(s, 5 * unit));
++    __ ld(tmp_reg5, Address(s, 6 * unit));
++    __ ld(tmp_reg6, Address(s, 7 * unit));
++    __ ld(tmp_reg7, Address(s, 8 * unit));
++    __ addi(s, s, 8 * unit);
 +
-+  // Call C code.  Need thread but NOT official VM entry
-+  // crud.  We cannot block on this call, no GC can happen.  Call should
-+  // capture callee-saved registers as well as return values.
-+  //
-+  // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index, jint exec_mode)
-+  //
-+  // n.b. 3 gp args, 0 fp args, integral return type
++    __ sub(count, count, 16);
++    __ bltz(count, drain);
 +
-+  __ mv(c_rarg0, xthread);
-+  __ mvw(c_rarg2, (unsigned)Deoptimization::Unpack_uncommon_trap);
-+  int32_t offset = 0;
-+  __ la_patchable(t0,
-+        RuntimeAddress(CAST_FROM_FN_PTR(address,
-+                                        Deoptimization::uncommon_trap)), offset);
-+  __ jalr(x1, t0, offset);
-+  __ bind(retaddr);
++    __ bind(again);
 +
-+  // Set an oopmap for the call site
-+  OopMapSet* oop_maps = new OopMapSet();
-+  OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0);
-+  assert_cond(oop_maps != NULL && map != NULL);
++    __ sd(tmp_reg0, Address(d, 1 * unit));
++    __ sd(tmp_reg1, Address(d, 2 * unit));
++    __ sd(tmp_reg2, Address(d, 3 * unit));
++    __ sd(tmp_reg3, Address(d, 4 * unit));
++    __ sd(tmp_reg4, Address(d, 5 * unit));
++    __ sd(tmp_reg5, Address(d, 6 * unit));
++    __ sd(tmp_reg6, Address(d, 7 * unit));
++    __ sd(tmp_reg7, Address(d, 8 * unit));
 +
-+  // location of fp is known implicitly by the frame sender code
++    __ ld(tmp_reg0, Address(s, 1 * unit));
++    __ ld(tmp_reg1, Address(s, 2 * unit));
++    __ ld(tmp_reg2, Address(s, 3 * unit));
++    __ ld(tmp_reg3, Address(s, 4 * unit));
++    __ ld(tmp_reg4, Address(s, 5 * unit));
++    __ ld(tmp_reg5, Address(s, 6 * unit));
++    __ ld(tmp_reg6, Address(s, 7 * unit));
++    __ ld(tmp_reg7, Address(s, 8 * unit));
 +
-+  oop_maps->add_gc_map(__ pc() - start, map);
++    __ addi(s, s, 8 * unit);
++    __ addi(d, d, 8 * unit);
 +
-+  __ reset_last_Java_frame(false);
++    __ sub(count, count, 8);
++    __ bgez(count, again);
 +
-+  // move UnrollBlock* into x14
-+  __ mv(x14, x10);
++    // Drain
++    __ bind(drain);
 +
-+#ifdef ASSERT
-+  { Label L;
-+    __ lwu(t0, Address(x14, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
-+    __ mvw(t1, Deoptimization::Unpack_uncommon_trap);
-+    __ beq(t0, t1, L);
-+    __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
-+    __ bind(L);
-+  }
-+#endif
++    __ sd(tmp_reg0, Address(d, 1 * unit));
++    __ sd(tmp_reg1, Address(d, 2 * unit));
++    __ sd(tmp_reg2, Address(d, 3 * unit));
++    __ sd(tmp_reg3, Address(d, 4 * unit));
++    __ sd(tmp_reg4, Address(d, 5 * unit));
++    __ sd(tmp_reg5, Address(d, 6 * unit));
++    __ sd(tmp_reg6, Address(d, 7 * unit));
++    __ sd(tmp_reg7, Address(d, 8 * unit));
++    __ addi(d, d, 8 * unit);
 +
-+  // Pop all the frames we must move/replace.
-+  //
-+  // Frame picture (youngest to oldest)
-+  // 1: self-frame (no frame link)
-+  // 2: deopting frame  (no frame link)
-+  // 3: caller of deopting frame (could be compiled/interpreted).
++    {
++      Label L1, L2;
++      __ andi(t0, count, 4);
++      __ beqz(t0, L1);
 +
-+  __ add(sp, sp, (SimpleRuntimeFrame::framesize) << LogBytesPerInt); // Epilog!
++      __ ld(tmp_reg0, Address(s, 1 * unit));
++      __ ld(tmp_reg1, Address(s, 2 * unit));
++      __ ld(tmp_reg2, Address(s, 3 * unit));
++      __ ld(tmp_reg3, Address(s, 4 * unit));
++      __ addi(s, s, 4 * unit);
 +
-+  // Pop deoptimized frame (int)
-+  __ lwu(x12, Address(x14,
-+                      Deoptimization::UnrollBlock::
-+                      size_of_deoptimized_frame_offset_in_bytes()));
-+  __ sub(x12, x12, 2 * wordSize);
-+  __ add(sp, sp, x12);
-+  __ ld(fp, sp, 0);
-+  __ ld(ra, sp, wordSize);
-+  __ addi(sp, sp, 2 * wordSize);
-+  // RA should now be the return address to the caller (3) frame
++      __ sd(tmp_reg0, Address(d, 1 * unit));
++      __ sd(tmp_reg1, Address(d, 2 * unit));
++      __ sd(tmp_reg2, Address(d, 3 * unit));
++      __ sd(tmp_reg3, Address(d, 4 * unit));
++      __ addi(d, d, 4 * unit);
 +
-+#ifdef ASSERT
-+  // Compilers generate code that bang the stack by as much as the
-+  // interpreter would need. So this stack banging should never
-+  // trigger a fault. Verify that it does not on non product builds.
-+  __ lwu(x11, Address(x14,
-+                      Deoptimization::UnrollBlock::
-+                      total_frame_sizes_offset_in_bytes()));
-+  __ bang_stack_size(x11, x12);
-+#endif
++      __ bind(L1);
 +
-+  // Load address of array of frame pcs into x12 (address*)
-+  __ ld(x12, Address(x14,
-+                     Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
++      if (direction == copy_forwards) {
++        __ addi(s, s, bias);
++        __ addi(d, d, bias);
++      }
 +
-+  // Load address of array of frame sizes into x15 (intptr_t*)
-+  __ ld(x15, Address(x14,
-+                     Deoptimization::UnrollBlock::
-+                     frame_sizes_offset_in_bytes()));
++      __ andi(t0, count, 2);
++      __ beqz(t0, L2);
++      if (direction == copy_backwards) {
++        __ addi(s, s, 2 * unit);
++        __ ld(tmp_reg0, Address(s));
++        __ ld(tmp_reg1, Address(s, wordSize));
++        __ addi(d, d, 2 * unit);
++        __ sd(tmp_reg0, Address(d));
++        __ sd(tmp_reg1, Address(d, wordSize));
++      } else {
++        __ ld(tmp_reg0, Address(s));
++        __ ld(tmp_reg1, Address(s, wordSize));
++        __ addi(s, s, 2 * unit);
++        __ sd(tmp_reg0, Address(d));
++        __ sd(tmp_reg1, Address(d, wordSize));
++        __ addi(d, d, 2 * unit);
++      }
++      __ bind(L2);
++    }
 +
-+  // Counter
-+  __ lwu(x13, Address(x14,
-+                      Deoptimization::UnrollBlock::
-+                      number_of_frames_offset_in_bytes())); // (int)
++    __ ret();
++  }
 +
-+  // Now adjust the caller's stack to make up for the extra locals but
-+  // record the original sp so that we can save it in the skeletal
-+  // interpreter frame and the stack walking of interpreter_sender
-+  // will get the unextended sp value and not the "real" sp value.
++  Label copy_f, copy_b;
 +
-+  const Register sender_sp = t1; // temporary register
++  // All-singing all-dancing memory copy.
++  //
++  // Copy count units of memory from s to d.  The size of a unit is
++  // step, which can be positive or negative depending on the direction
++  // of copy.  If is_aligned is false, we align the source address.
++  //
++  /*
++   * if (is_aligned) {
++   *   goto copy_8_bytes;
++   * }
++   * bool is_backwards = step < 0;
++   * int granularity = uabs(step);
++   * count = count  *  granularity;   * count bytes
++   *
++   * if (is_backwards) {
++   *   s += count;
++   *   d += count;
++   * }
++   *
++   * count limit maybe greater than 16, for better performance
++   * if (count < 16) {
++   *   goto copy_small;
++   * }
++   *
++   * if ((dst % 8) == (src % 8)) {
++   *   aligned;
++   *   goto copy8;
++   * }
++   *
++   * copy_small:
++   *   load element one by one;
++   * done;
++   */
 +
-+  __ lwu(x11, Address(x14,
-+                      Deoptimization::UnrollBlock::
-+                      caller_adjustment_offset_in_bytes())); // (int)
-+  __ mv(sender_sp, sp);
-+  __ sub(sp, sp, x11);
++  typedef void (MacroAssembler::*copy_insn)(Register Rd, const Address &adr, Register temp);
 +
-+  // Push interpreter frames in a loop
-+  Label loop;
-+  __ bind(loop);
-+  __ ld(x11, Address(x15, 0));       // Load frame size
-+  __ sub(x11, x11, 2 * wordSize);    // We'll push pc and fp by hand
-+  __ ld(ra, Address(x12, 0));        // Save return address
-+  __ enter();                        // and old fp & set new fp
-+  __ sub(sp, sp, x11);               // Prolog
-+  __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable
-+  // This value is corrected by layout_activation_impl
-+  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
-+  __ mv(sender_sp, sp);              // Pass sender_sp to next frame
-+  __ add(x15, x15, wordSize);        // Bump array pointer (sizes)
-+  __ add(x12, x12, wordSize);        // Bump array pointer (pcs)
-+  __ subw(x13, x13, 1);              // Decrement counter
-+  __ bgtz(x13, loop);
-+  __ ld(ra, Address(x12, 0));        // save final return address
-+  // Re-push self-frame
-+  __ enter();                        // & old fp & set new fp
++  void copy_memory_v(Register s, Register d, Register count, Register tmp, int step) {
++    bool is_backward = step < 0;
++    int granularity = uabs(step);
 +
-+  // Use fp because the frames look interpreted now
-+  // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
-+  // Don't need the precise return PC here, just precise enough to point into this code blob.
-+  address the_pc = __ pc();
-+  __ set_last_Java_frame(sp, fp, the_pc, t0);
++    const Register src = x30, dst = x31, vl = x14, cnt = x15, tmp1 = x16, tmp2 = x17;
++    assert_different_registers(s, d, cnt, vl, tmp, tmp1, tmp2);
++    Assembler::SEW sew = Assembler::elembytes_to_sew(granularity);
++    Label loop_forward, loop_backward, done;
 +
-+  // Call C code.  Need thread but NOT official VM entry
-+  // crud.  We cannot block on this call, no GC can happen.  Call should
-+  // restore return values to their stack-slots with the new SP.
-+  //
-+  // BasicType unpack_frames(JavaThread* thread, int exec_mode)
-+  //
++    __ mv(dst, d);
++    __ mv(src, s);
++    __ mv(cnt, count);
 +
-+  // n.b. 2 gp args, 0 fp args, integral return type
++    __ bind(loop_forward);
++    __ vsetvli(vl, cnt, sew, Assembler::m8);
++    if (is_backward) {
++      __ bne(vl, cnt, loop_backward);
++    }
 +
-+  // sp should already be aligned
-+  __ mv(c_rarg0, xthread);
-+  __ mvw(c_rarg1, (unsigned)Deoptimization::Unpack_uncommon_trap);
-+  offset = 0;
-+  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset);
-+  __ jalr(x1, t0, offset);
++    __ vlex_v(v0, src, sew);
++    __ sub(cnt, cnt, vl);
++    __ slli(vl, vl, (int)sew);
++    __ add(src, src, vl);
 +
-+  // Set an oopmap for the call site
-+  // Use the same PC we used for the last java frame
-+  oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
++    __ vsex_v(v0, dst, sew);
++    __ add(dst, dst, vl);
++    __ bnez(cnt, loop_forward);
 +
-+  // Clear fp AND pc
-+  __ reset_last_Java_frame(true);
++    if (is_backward) {
++      __ j(done);
 +
-+  // Pop self-frame.
-+  __ leave();                 // Epilog
++      __ bind(loop_backward);
++      __ sub(tmp, cnt, vl);
++      __ slli(tmp, tmp, sew);
++      __ add(tmp1, s, tmp);
++      __ vlex_v(v0, tmp1, sew);
++      __ add(tmp2, d, tmp);
++      __ vsex_v(v0, tmp2, sew);
++      __ sub(cnt, cnt, vl);
++      __ bnez(cnt, loop_forward);
++      __ bind(done);
++    }
++  }
 +
-+  // Jump to interpreter
-+  __ ret();
++  void copy_memory(bool is_aligned, Register s, Register d,
++                   Register count, Register tmp, int step) {
++    if (UseRVV) {
++      return copy_memory_v(s, d, count, tmp, step);
++    }
 +
-+  // Make sure all code is generated
-+  masm->flush();
++    bool is_backwards = step < 0;
++    int granularity = uabs(step);
 +
-+  _uncommon_trap_blob =  UncommonTrapBlob::create(&buffer, oop_maps,
-+                                                  SimpleRuntimeFrame::framesize >> 1);
-+}
-+#endif // COMPILER2
-+
-+//------------------------------generate_handler_blob------
-+//
-+// Generate a special Compile2Runtime blob that saves all registers,
-+// and setup oopmap.
-+//
-+SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
-+  ResourceMark rm;
-+  OopMapSet *oop_maps = new OopMapSet();
-+  assert_cond(oop_maps != NULL);
-+  OopMap* map = NULL;
++    const Register src = x30, dst = x31, cnt = x15, tmp3 = x16, tmp4 = x17;
 +
-+  // Allocate space for the code.  Setup code generation tools.
-+  CodeBuffer buffer("handler_blob", 2048, 1024);
-+  MacroAssembler* masm = new MacroAssembler(&buffer);
-+  assert_cond(masm != NULL);
++    Label same_aligned;
++    Label copy8, copy_small, done;
 +
-+  address start   = __ pc();
-+  address call_pc = NULL;
-+  int frame_size_in_words = -1;
-+  bool cause_return = (poll_type == POLL_AT_RETURN);
-+  RegisterSaver reg_saver(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */);
++    copy_insn ld_arr = NULL, st_arr = NULL;
++    switch (granularity) {
++      case 1 :
++        ld_arr = (copy_insn)&MacroAssembler::lbu;
++        st_arr = (copy_insn)&MacroAssembler::sb;
++        break;
++      case 2 :
++        ld_arr = (copy_insn)&MacroAssembler::lhu;
++        st_arr = (copy_insn)&MacroAssembler::sh;
++        break;
++      case 4 :
++        ld_arr = (copy_insn)&MacroAssembler::lwu;
++        st_arr = (copy_insn)&MacroAssembler::sw;
++        break;
++      case 8 :
++        ld_arr = (copy_insn)&MacroAssembler::ld;
++        st_arr = (copy_insn)&MacroAssembler::sd;
++        break;
++      default :
++        ShouldNotReachHere();
++    }
 +
-+  // Save Integer and Float registers.
-+  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
++    __ beqz(count, done);
++    __ slli(cnt, count, exact_log2(granularity));
++    if (is_backwards) {
++      __ add(src, s, cnt);
++      __ add(dst, d, cnt);
++    } else {
++      __ mv(src, s);
++      __ mv(dst, d);
++    }
 +
-+  // The following is basically a call_VM.  However, we need the precise
-+  // address of the call in order to generate an oopmap. Hence, we do all the
-+  // work outselves.
++    if (is_aligned) {
++      __ addi(tmp, cnt, -8);
++      __ bgez(tmp, copy8);
++      __ j(copy_small);
++    }
 +
-+  Label retaddr;
-+  __ set_last_Java_frame(sp, noreg, retaddr, t0);
++    __ mv(tmp, 16);
++    __ blt(cnt, tmp, copy_small);
 +
-+  // The return address must always be correct so that frame constructor never
-+  // sees an invalid pc.
++    __ xorr(tmp, src, dst);
++    __ andi(tmp, tmp, 0b111);
++    __ bnez(tmp, copy_small);
 +
-+  if (!cause_return) {
-+    // overwrite the return address pushed by save_live_registers
-+    // Additionally, x18 is a callee-saved register so we can look at
-+    // it later to determine if someone changed the return address for
-+    // us!
-+    __ ld(x18, Address(xthread, JavaThread::saved_exception_pc_offset()));
-+    __ sd(x18, Address(fp, frame::return_addr_offset * wordSize));
-+  }
++    __ bind(same_aligned);
++    __ andi(tmp, src, 0b111);
++    __ beqz(tmp, copy8);
++    if (is_backwards) {
++      __ addi(src, src, step);
++      __ addi(dst, dst, step);
++    }
++    (_masm->*ld_arr)(tmp3, Address(src), t0);
++    (_masm->*st_arr)(tmp3, Address(dst), t0);
++    if (!is_backwards) {
++      __ addi(src, src, step);
++      __ addi(dst, dst, step);
++    }
++    __ addi(cnt, cnt, -granularity);
++    __ beqz(cnt, done);
++    __ j(same_aligned);
 +
-+  // Do the call
-+  __ mv(c_rarg0, xthread);
-+  int32_t offset = 0;
-+  __ la_patchable(t0, RuntimeAddress(call_ptr), offset);
-+  __ jalr(x1, t0, offset);
-+  __ bind(retaddr);
++    __ bind(copy8);
++    if (is_backwards) {
++      __ addi(src, src, -wordSize);
++      __ addi(dst, dst, -wordSize);
++    }
++    __ ld(tmp3, Address(src));
++    __ sd(tmp3, Address(dst));
++    if (!is_backwards) {
++      __ addi(src, src, wordSize);
++      __ addi(dst, dst, wordSize);
++    }
++    __ addi(cnt, cnt, -wordSize);
++    __ addi(tmp4, cnt, -8);
++    __ bgez(tmp4, copy8); // cnt >= 8, do next loop
 +
-+  // Set an oopmap for the call site.  This oopmap will map all
-+  // oop-registers and debug-info registers as callee-saved.  This
-+  // will allow deoptimization at this safepoint to find all possible
-+  // debug-info recordings, as well as let GC find all oops.
++    __ beqz(cnt, done);
 +
-+  oop_maps->add_gc_map( __ pc() - start, map);
++    __ bind(copy_small);
++    if (is_backwards) {
++      __ addi(src, src, step);
++      __ addi(dst, dst, step);
++    }
++    (_masm->*ld_arr)(tmp3, Address(src), t0);
++    (_masm->*st_arr)(tmp3, Address(dst), t0);
++    if (!is_backwards) {
++      __ addi(src, src, step);
++      __ addi(dst, dst, step);
++    }
++    __ addi(cnt, cnt, -granularity);
++    __ bgtz(cnt, copy_small);
 +
-+  Label noException;
++    __ bind(done);
++  }
 +
-+  __ reset_last_Java_frame(false);
++  // Scan over array at a for count oops, verifying each one.
++  // Preserves a and count, clobbers t0 and t1.
++  void verify_oop_array(size_t size, Register a, Register count, Register temp) {
++    Label loop, end;
++    __ mv(t1, zr);
++    __ slli(t0, count, exact_log2(size));
++    __ bind(loop);
++    __ bgeu(t1, t0, end);
 +
-+  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
++    __ add(temp, a, t1);
++    if (size == (size_t)wordSize) {
++      __ ld(temp, Address(temp, 0));
++      __ verify_oop(temp);
++    } else {
++      __ lwu(temp, Address(temp, 0));
++      __ decode_heap_oop(temp); // calls verify_oop
++    }
++    __ add(t1, t1, size);
++    __ j(loop);
++    __ bind(end);
++  }
 +
-+  __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
-+  __ beqz(t0, noException);
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  // Side Effects:
++  //   disjoint_int_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_int_oop_copy().
++  //
++  address generate_disjoint_copy(size_t size, bool aligned, bool is_oop, address* entry,
++                                 const char* name, bool dest_uninitialized = false) {
++    const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
++    RegSet saved_reg = RegSet::of(s, d, count);
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++    __ enter();
 +
-+  // Exception pending
++    if (entry != NULL) {
++      *entry = __ pc();
++      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
++      BLOCK_COMMENT("Entry:");
++    }
 +
-+  reg_saver.restore_live_registers(masm);
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
++    if (aligned) {
++      decorators |= ARRAYCOPY_ALIGNED;
++    }
 +
-+  __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_reg);
 +
-+  // No exception case
-+  __ bind(noException);
++    if (is_oop) {
++      // save regs before copy_memory
++      __ push_reg(RegSet::of(d, count), sp);
++    }
 +
-+  Label no_adjust, bail;
-+  if (!cause_return) {
-+    // If our stashed return pc was modified by the runtime we avoid touching it
-+    __ ld(t0, Address(fp, frame::return_addr_offset * wordSize));
-+    __ bne(x18, t0, no_adjust);
++    copy_memory(aligned, s, d, count, t0, size);
 +
-+#ifdef ASSERT
-+    // Verify the correct encoding of the poll we're about to skip.
-+    // See NativeInstruction::is_lwu_to_zr()
-+    __ lwu(t0, Address(x18));
-+    __ andi(t1, t0, 0b0000011);
-+    __ mv(t2, 0b0000011);
-+    __ bne(t1, t2, bail); // 0-6:0b0000011
-+    __ srli(t1, t0, 7);
-+    __ andi(t1, t1, 0b00000);
-+    __ bnez(t1, bail);    // 7-11:0b00000
-+    __ srli(t1, t0, 12);
-+    __ andi(t1, t1, 0b110);
-+    __ mv(t2, 0b110);
-+    __ bne(t1, t2, bail); // 12-14:0b110
-+#endif
-+    // Adjust return pc forward to step over the safepoint poll instruction
-+    __ add(x18, x18, NativeInstruction::instruction_size);
-+    __ sd(x18, Address(fp, frame::return_addr_offset * wordSize));
-+  }
++    if (is_oop) {
++      __ pop_reg(RegSet::of(d, count), sp);
++      if (VerifyOops) {
++        verify_oop_array(size, d, count, t2);
++      }
++    }
 +
-+  __ bind(no_adjust);
-+  // Normal exit, restore registers and exit.
++    bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, RegSet());
 +
-+  reg_saver.restore_live_registers(masm);
-+  __ ret();
++    __ leave();
++    __ mv(x10, zr); // return 0
++    __ ret();
++    return start;
++  }
 +
-+#ifdef ASSERT
-+  __ bind(bail);
-+  __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected");
-+#endif
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  address generate_conjoint_copy(size_t size, bool aligned, bool is_oop, address nooverlap_target,
++                                 address* entry, const char* name,
++                                 bool dest_uninitialized = false) {
++    const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
++    RegSet saved_regs = RegSet::of(s, d, count);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++    __ enter();
 +
-+  // Make sure all code is generated
-+  masm->flush();
++    if (entry != NULL) {
++      *entry = __ pc();
++      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
++      BLOCK_COMMENT("Entry:");
++    }
 +
-+  // Fill-out other meta info
-+  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
-+}
++    // use fwd copy when (d-s) above_equal (count*size)
++    __ sub(t0, d, s);
++    __ slli(t1, count, exact_log2(size));
++    __ bgeu(t0, t1, nooverlap_target);
 +
-+//
-+// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
-+//
-+// Generate a stub that calls into vm to find out the proper destination
-+// of a java call. All the argument registers are live at this point
-+// but since this is generic code we don't know what they are and the caller
-+// must do any gc of the args.
-+//
-+RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
-+  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
++    if (aligned) {
++      decorators |= ARRAYCOPY_ALIGNED;
++    }
 +
-+  // allocate space for the code
-+  ResourceMark rm;
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_regs);
 +
-+  CodeBuffer buffer(name, 1000, 512);
-+  MacroAssembler* masm = new MacroAssembler(&buffer);
-+  assert_cond(masm != NULL);
++    if (is_oop) {
++      // save regs before copy_memory
++      __ push_reg(RegSet::of(d, count), sp);
++    }
 +
-+  int frame_size_in_words = -1;
-+  RegisterSaver reg_saver(false /* save_vectors */);
++    copy_memory(aligned, s, d, count, t0, -size);
 +
-+  OopMapSet *oop_maps = new OopMapSet();
-+  assert_cond(oop_maps != NULL);
-+  OopMap* map = NULL;
++    if (is_oop) {
++      __ pop_reg(RegSet::of(d, count), sp);
++      if (VerifyOops) {
++        verify_oop_array(size, d, count, t2);
++      }
++    }
++    bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, RegSet());
++    __ leave();
++    __ mv(x10, zr); // return 0
++    __ ret();
++    return start;
++  }
 +
-+  int start = __ offset();
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  // Side Effects:
++  //   disjoint_byte_copy_entry is set to the no-overlap entry point  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  // Side Effects:
++  //   disjoint_byte_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_byte_copy().
++  //
++  address generate_disjoint_byte_copy(bool aligned, address* entry, const char* name) {
++    const bool not_oop = false;
++    return generate_disjoint_copy(sizeof (jbyte), aligned, not_oop, entry, name);
++  }
 +
-+  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  address generate_conjoint_byte_copy(bool aligned, address nooverlap_target,
++                                      address* entry, const char* name) {
++    const bool not_oop = false;
++    return generate_conjoint_copy(sizeof (jbyte), aligned, not_oop, nooverlap_target, entry, name);
++  }
 +
-+  int frame_complete = __ offset();
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
++  // let the hardware handle it.  The two or four words within dwords
++  // or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  // Side Effects:
++  //   disjoint_short_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_short_copy().
++  //
++  address generate_disjoint_short_copy(bool aligned,
++                                       address* entry, const char* name) {
++    const bool not_oop = false;
++    return generate_disjoint_copy(sizeof (jshort), aligned, not_oop, entry, name);
++  }
 +
-+  {
-+    Label retaddr;
-+    __ set_last_Java_frame(sp, noreg, retaddr, t0);
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
++  // let the hardware handle it.  The two or four words within dwords
++  // or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  address generate_conjoint_short_copy(bool aligned, address nooverlap_target,
++                                       address* entry, const char* name) {
++    const bool not_oop = false;
++    return generate_conjoint_copy(sizeof (jshort), aligned, not_oop, nooverlap_target, entry, name);
++  }
 +
-+    __ mv(c_rarg0, xthread);
-+    int32_t offset = 0;
-+    __ la_patchable(t0, RuntimeAddress(destination), offset);
-+    __ jalr(x1, t0, offset);
-+    __ bind(retaddr);
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  // Side Effects:
++  //   disjoint_int_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_int_oop_copy().
++  //
++  address generate_disjoint_int_copy(bool aligned, address* entry,
++                                     const char* name, bool dest_uninitialized = false) {
++    const bool not_oop = false;
++    return generate_disjoint_copy(sizeof (jint), aligned, not_oop, entry, name);
 +  }
 +
-+  // Set an oopmap for the call site.
-+  // We need this not only for callee-saved registers, but also for volatile
-+  // registers that the compiler might be keeping live across a safepoint.
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  address generate_conjoint_int_copy(bool aligned, address nooverlap_target,
++                                     address* entry, const char* name,
++                                     bool dest_uninitialized = false) {
++    const bool not_oop = false;
++    return generate_conjoint_copy(sizeof (jint), aligned, not_oop, nooverlap_target, entry, name);
++  }
 +
-+  oop_maps->add_gc_map( __ offset() - start, map);
 +
-+  // x10 contains the address we are going to jump to assuming no exception got installed
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as size_t, can be zero
++  //
++  // Side Effects:
++  //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
++  //   no-overlap entry point used by generate_conjoint_long_oop_copy().
++  //
++  address generate_disjoint_long_copy(bool aligned, address* entry,
++                                      const char* name, bool dest_uninitialized = false) {
++    const bool not_oop = false;
++    return generate_disjoint_copy(sizeof (jlong), aligned, not_oop, entry, name);
++  }
 +
-+  // clear last_Java_sp
-+  __ reset_last_Java_frame(false);
-+  // check for pending exceptions
-+  Label pending;
-+  __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
-+  __ bnez(t0, pending);
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as size_t, can be zero
++  //
++  address generate_conjoint_long_copy(bool aligned,
++                                      address nooverlap_target, address* entry,
++                                      const char* name, bool dest_uninitialized = false) {
++    const bool not_oop = false;
++    return generate_conjoint_copy(sizeof (jlong), aligned, not_oop, nooverlap_target, entry, name);
++  }
 +
-+  // get the returned Method*
-+  __ get_vm_result_2(xmethod, xthread);
-+  __ sd(xmethod, Address(sp, reg_saver.reg_offset_in_bytes(xmethod)));
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as size_t, can be zero
++  //
++  // Side Effects:
++  //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
++  //   no-overlap entry point used by generate_conjoint_long_oop_copy().
++  //
++  address generate_disjoint_oop_copy(bool aligned, address* entry,
++                                     const char* name, bool dest_uninitialized) {
++    const bool is_oop = true;
++    const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
++    return generate_disjoint_copy(size, aligned, is_oop, entry, name, dest_uninitialized);
++  }
 +
-+  // x10 is where we want to jump, overwrite t0 which is saved and temporary
-+  __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(t0)));
-+  reg_saver.restore_live_registers(masm);
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as size_t, can be zero
++  //
++  address generate_conjoint_oop_copy(bool aligned,
++                                     address nooverlap_target, address* entry,
++                                     const char* name, bool dest_uninitialized) {
++    const bool is_oop = true;
++    const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
++    return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry,
++                                  name, dest_uninitialized);
++  }
 +
-+  // We are back the the original state on entry and ready to go.
++  // Helper for generating a dynamic type check.
++  // Smashes t0, t1.
++  void generate_type_check(Register sub_klass,
++                           Register super_check_offset,
++                           Register super_klass,
++                           Label& L_success) {
++    assert_different_registers(sub_klass, super_check_offset, super_klass);
 +
-+  __ jr(t0);
++    BLOCK_COMMENT("type_check:");
 +
-+  // Pending exception after the safepoint
++    Label L_miss;
 +
-+  __ bind(pending);
++    __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg, &L_success, &L_miss, NULL, super_check_offset);
++    __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL);
 +
-+  reg_saver.restore_live_registers(masm);
++    // Fall through on failure!
++    __ BIND(L_miss);
++  }
 +
-+  // exception pending => remove activation and forward to exception handler
++  //
++  //  Generate checkcasting array copy stub
++  //
++  //  Input:
++  //    c_rarg0   - source array address
++  //    c_rarg1   - destination array address
++  //    c_rarg2   - element count, treated as ssize_t, can be zero
++  //    c_rarg3   - size_t ckoff (super_check_offset)
++  //    c_rarg4   - oop ckval (super_klass)
++  //
++  //  Output:
++  //    x10 ==  0  -  success
++  //    x10 == -1^K - failure, where K is partial transfer count
++  //
++  address generate_checkcast_copy(const char* name, address* entry,
++                                  bool dest_uninitialized = false) {
++    Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop;
 +
-+  __ sd(zr, Address(xthread, JavaThread::vm_result_offset()));
++    // Input registers (after setup_arg_regs)
++    const Register from        = c_rarg0;   // source array address
++    const Register to          = c_rarg1;   // destination array address
++    const Register count       = c_rarg2;   // elementscount
++    const Register ckoff       = c_rarg3;   // super_check_offset
++    const Register ckval       = c_rarg4;   // super_klass
 +
-+  __ ld(x10, Address(xthread, Thread::pending_exception_offset()));
-+  __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
++    RegSet wb_pre_saved_regs   = RegSet::range(c_rarg0, c_rarg4);
++    RegSet wb_post_saved_regs  = RegSet::of(count);
 +
-+  // -------------
-+  // make sure all code is generated
-+  masm->flush();
++    // Registers used as temps (x7, x9, x18 are save-on-entry)
++    const Register count_save  = x19;       // orig elementscount
++    const Register start_to    = x18;       // destination array start address
++    const Register copied_oop  = x7;        // actual oop copied
++    const Register r9_klass    = x9;        // oop._klass
 +
-+  // return the  blob
-+  return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
-+}
++    //---------------------------------------------------------------
++    // Assembler stub will be used for this call to arraycopy
++    // if the two arrays are subtypes of Object[] but the
++    // destination array type is not equal to or a supertype
++    // of the source type.  Each element must be separately
++    // checked.
 +
-+#ifdef COMPILER2
-+RuntimeStub* SharedRuntime::make_native_invoker(address call_target,
-+                                                int shadow_space_bytes,
-+                                                const GrowableArray<VMReg>& input_registers,
-+                                                const GrowableArray<VMReg>& output_registers) {
-+  Unimplemented();
-+  return nullptr;
-+}
++    assert_different_registers(from, to, count, ckoff, ckval, start_to,
++                               copied_oop, r9_klass, count_save);
 +
-+//------------------------------generate_exception_blob---------------------------
-+// creates exception blob at the end
-+// Using exception blob, this code is jumped from a compiled method.
-+// (see emit_exception_handler in riscv.ad file)
-+//
-+// Given an exception pc at a call we call into the runtime for the
-+// handler in this method. This handler might merely restore state
-+// (i.e. callee save registers) unwind the frame and jump to the
-+// exception handler for the nmethod if there is no Java level handler
-+// for the nmethod.
-+//
-+// This code is entered with a jmp.
-+//
-+// Arguments:
-+//   x10: exception oop
-+//   x13: exception pc
-+//
-+// Results:
-+//   x10: exception oop
-+//   x13: exception pc in caller
-+//   destination: exception handler of caller
-+//
-+// Note: the exception pc MUST be at a call (precise debug information)
-+//       Registers x10, x13, x12, x14, x15, t0 are not callee saved.
-+//
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
 +
-+void OptoRuntime::generate_exception_blob() {
-+  assert(!OptoRuntime::is_callee_saved_register(R13_num), "");
-+  assert(!OptoRuntime::is_callee_saved_register(R10_num), "");
-+  assert(!OptoRuntime::is_callee_saved_register(R12_num), "");
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
 +
-+  assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
++    // Caller of this entry point must set up the argument registers.
++    if (entry != NULL) {
++      *entry = __ pc();
++      BLOCK_COMMENT("Entry:");
++    }
 +
-+  // Allocate space for the code
-+  ResourceMark rm;
-+  // Setup code generation tools
-+  CodeBuffer buffer("exception_blob", 2048, 1024);
-+  MacroAssembler* masm = new MacroAssembler(&buffer);
-+  assert_cond(masm != NULL);
++    // Empty array:  Nothing to do
++    __ beqz(count, L_done);
 +
-+  // TODO check various assumptions made here
-+  //
-+  // make sure we do so before running this
++    __ push_reg(RegSet::of(x7, x9, x18, x19), sp);
 +
-+  address start = __ pc();
++#ifdef ASSERT
++    BLOCK_COMMENT("assert consistent ckoff/ckval");
++    // The ckoff and ckval must be mutually consistent,
++    // even though caller generates both.
++    { Label L;
++      int sco_offset = in_bytes(Klass::super_check_offset_offset());
++      __ lwu(start_to, Address(ckval, sco_offset));
++      __ beq(ckoff, start_to, L);
++      __ stop("super_check_offset inconsistent");
++      __ bind(L);
++    }
++#endif //ASSERT
 +
-+  // push fp and retaddr by hand
-+  // Exception pc is 'return address' for stack walker
-+  __ addi(sp, sp, -2 * wordSize);
-+  __ sd(ra, Address(sp, wordSize));
-+  __ sd(fp, Address(sp));
-+  // there are no callee save registers and we don't expect an
-+  // arg reg save area
-+#ifndef PRODUCT
-+  assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
-+#endif
-+  // Store exception in Thread object. We cannot pass any arguments to the
-+  // handle_exception call, since we do not want to make any assumption
-+  // about the size of the frame where the exception happened in.
-+  __ sd(x10, Address(xthread, JavaThread::exception_oop_offset()));
-+  __ sd(x13, Address(xthread, JavaThread::exception_pc_offset()));
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT;
++    bool is_oop = true;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
 +
-+  // This call does all the hard work.  It checks if an exception handler
-+  // exists in the method.
-+  // If so, it returns the handler address.
-+  // If not, it prepares for stack-unwinding, restoring the callee-save
-+  // registers of the frame being removed.
-+  //
-+  // address OptoRuntime::handle_exception_C(JavaThread* thread)
-+  //
-+  // n.b. 1 gp arg, 0 fp args, integral return type
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    bs->arraycopy_prologue(_masm, decorators, is_oop, from, to, count, wb_pre_saved_regs);
 +
-+  // the stack should always be aligned
-+  address the_pc = __ pc();
-+  __ set_last_Java_frame(sp, noreg, the_pc, t0);
-+  __ mv(c_rarg0, xthread);
-+  int32_t offset = 0;
-+  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)), offset);
-+  __ jalr(x1, t0, offset);
++    // save the original count
++    __ mv(count_save, count);
 +
++    // Copy from low to high addresses
++    __ mv(start_to, to);              // Save destination array start address
++    __ j(L_load_element);
 +
-+  // handle_exception_C is a special VM call which does not require an explicit
-+  // instruction sync afterwards.
++    // ======== begin loop ========
++    // (Loop is rotated; its entry is L_load_element.)
++    // Loop control:
++    //   for count to 0 do
++    //     copied_oop = load_heap_oop(from++)
++    //     ... generate_type_check ...
++    //     store_heap_oop(to++, copied_oop)
++    //   end
 +
-+  // Set an oopmap for the call site.  This oopmap will only be used if we
-+  // are unwinding the stack.  Hence, all locations will be dead.
-+  // Callee-saved registers will be the same as the frame above (i.e.,
-+  // handle_exception_stub), since they were restored when we got the
-+  // exception.
++    __ align(OptoLoopAlignment);
 +
-+  OopMapSet* oop_maps = new OopMapSet();
-+  assert_cond(oop_maps != NULL);
++    __ BIND(L_store_element);
++    __ store_heap_oop(Address(to, 0), copied_oop, noreg, noreg, AS_RAW);  // store the oop
++    __ add(to, to, UseCompressedOops ? 4 : 8);
++    __ sub(count, count, 1);
++    __ beqz(count, L_do_card_marks);
 +
-+  oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
++    // ======== loop entry is here ========
++    __ BIND(L_load_element);
++    __ load_heap_oop(copied_oop, Address(from, 0), noreg, noreg, AS_RAW); // load the oop
++    __ add(from, from, UseCompressedOops ? 4 : 8);
++    __ beqz(copied_oop, L_store_element);
 +
-+  __ reset_last_Java_frame(false);
++    __ load_klass(r9_klass, copied_oop);// query the object klass
++    generate_type_check(r9_klass, ckoff, ckval, L_store_element);
++    // ======== end loop ========
 +
-+  // Restore callee-saved registers
++    // It was a real error; we must depend on the caller to finish the job.
++    // Register count = remaining oops, count_orig = total oops.
++    // Emit GC store barriers for the oops we have copied and report
++    // their number to the caller.
 +
-+  // fp is an implicitly saved callee saved register (i.e. the calling
-+  // convention will save restore it in prolog/epilog) Other than that
-+  // there are no callee save registers now that adapter frames are gone.
-+  // and we dont' expect an arg reg save area
-+  __ ld(fp, Address(sp));
-+  __ ld(x13, Address(sp, wordSize));
-+  __ addi(sp, sp , 2 * wordSize);
++    __ sub(count, count_save, count);     // K = partially copied oop count
++    __ xori(count, count, -1);                   // report (-1^K) to caller
++    __ beqz(count, L_done_pop);
 +
-+  // x10: exception handler
++    __ BIND(L_do_card_marks);
++    bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, count_save, t0, wb_post_saved_regs);
 +
-+  // We have a handler in x10 (could be deopt blob).
-+  __ mv(t0, x10);
++    __ bind(L_done_pop);
++    __ pop_reg(RegSet::of(x7, x9, x18, x19), sp);
++    inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
 +
-+  // Get the exception oop
-+  __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
-+  // Get the exception pc in case we are deoptimized
-+  __ ld(x14, Address(xthread, JavaThread::exception_pc_offset()));
-+#ifdef ASSERT
-+  __ sd(zr, Address(xthread, JavaThread::exception_handler_pc_offset()));
-+  __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
-+#endif
-+  // Clear the exception oop so GC no longer processes it as a root.
-+  __ sd(zr, Address(xthread, JavaThread::exception_oop_offset()));
++    __ bind(L_done);
++    __ mv(x10, count);
++    __ leave();
++    __ ret();
 +
-+  // x10: exception oop
-+  // t0:  exception handler
-+  // x14: exception pc
-+  // Jump to handler
++    return start;
++  }
 +
-+  __ jr(t0);
++  // Perform range checks on the proposed arraycopy.
++  // Kills temp, but nothing else.
++  // Also, clean the sign bits of src_pos and dst_pos.
++  void arraycopy_range_checks(Register src,     // source array oop (c_rarg0)
++                              Register src_pos, // source position (c_rarg1)
++                              Register dst,     // destination array oo (c_rarg2)
++                              Register dst_pos, // destination position (c_rarg3)
++                              Register length,
++                              Register temp,
++                              Label& L_failed) {
++    BLOCK_COMMENT("arraycopy_range_checks:");
 +
-+  // Make sure all code is generated
-+  masm->flush();
++    assert_different_registers(t0, temp);
 +
-+  // Set exception blob
-+  _exception_blob =  ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
-+}
-+#endif // COMPILER2
-diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-new file mode 100644
-index 00000000000..b3fdd04db1b
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-@@ -0,0 +1,3864 @@
-+/*
-+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++    // if [src_pos + length > arrayOop(src)->length()] then FAIL
++    __ lwu(t0, Address(src, arrayOopDesc::length_offset_in_bytes()));
++    __ addw(temp, length, src_pos);
++    __ bgtu(temp, t0, L_failed);
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "compiler/oopMap.hpp"
-+#include "gc/shared/barrierSet.hpp"
-+#include "gc/shared/barrierSetAssembler.hpp"
-+#include "interpreter/interpreter.hpp"
-+#include "memory/universe.hpp"
-+#include "nativeInst_riscv.hpp"
-+#include "oops/instanceOop.hpp"
-+#include "oops/method.hpp"
-+#include "oops/objArrayKlass.hpp"
-+#include "oops/oop.inline.hpp"
-+#include "prims/methodHandles.hpp"
-+#include "runtime/frame.inline.hpp"
-+#include "runtime/handles.inline.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "runtime/stubCodeGenerator.hpp"
-+#include "runtime/stubRoutines.hpp"
-+#include "runtime/thread.inline.hpp"
-+#include "utilities/align.hpp"
-+#include "utilities/powerOfTwo.hpp"
-+#ifdef COMPILER2
-+#include "opto/runtime.hpp"
-+#endif
-+#if INCLUDE_ZGC
-+#include "gc/z/zThreadLocalData.hpp"
-+#endif
++    // if [dst_pos + length > arrayOop(dst)->length()] then FAIL
++    __ lwu(t0, Address(dst, arrayOopDesc::length_offset_in_bytes()));
++    __ addw(temp, length, dst_pos);
++    __ bgtu(temp, t0, L_failed);
 +
-+// Declaration and definition of StubGenerator (no .hpp file).
-+// For a more detailed description of the stub routine structure
-+// see the comment in stubRoutines.hpp
++    // Have to clean up high 32 bits of 'src_pos' and 'dst_pos'.
++    __ zero_extend(src_pos, src_pos, 32);
++    __ zero_extend(dst_pos, dst_pos, 32);
 +
-+#undef __
-+#define __ _masm->
++    BLOCK_COMMENT("arraycopy_range_checks done");
++  }
 +
-+#ifdef PRODUCT
-+#define BLOCK_COMMENT(str) /* nothing */
-+#else
-+#define BLOCK_COMMENT(str) __ block_comment(str)
-+#endif
++  //
++  //  Generate 'unsafe' array copy stub
++  //  Though just as safe as the other stubs, it takes an unscaled
++  //  size_t argument instead of an element count.
++  //
++  //  Input:
++  //    c_rarg0   - source array address
++  //    c_rarg1   - destination array address
++  //    c_rarg2   - byte count, treated as ssize_t, can be zero
++  //
++  // Examines the alignment of the operands and dispatches
++  // to a long, int, short, or byte copy loop.
++  //
++  address generate_unsafe_copy(const char* name,
++                               address byte_copy_entry,
++                               address short_copy_entry,
++                               address int_copy_entry,
++                               address long_copy_entry) {
++    assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL &&
++                int_copy_entry != NULL && long_copy_entry != NULL);
++    Label L_long_aligned, L_int_aligned, L_short_aligned;
++    const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
 +
-+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
 +
-+// Stub Code definitions
++    // bump this on entry, not on exit:
++    inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr);
 +
-+class StubGenerator: public StubCodeGenerator {
-+ private:
++    __ orr(t0, s, d);
++    __ orr(t0, t0, count);
 +
-+#ifdef PRODUCT
-+#define inc_counter_np(counter) ((void)0)
-+#else
-+  void inc_counter_np_(int& counter) {
-+    __ la(t1, ExternalAddress((address)&counter));
-+    __ lwu(t0, Address(t1, 0));
-+    __ addiw(t0, t0, 1);
-+    __ sw(t0, Address(t1, 0));
++    __ andi(t0, t0, BytesPerLong - 1);
++    __ beqz(t0, L_long_aligned);
++    __ andi(t0, t0, BytesPerInt - 1);
++    __ beqz(t0, L_int_aligned);
++    __ andi(t0, t0, 1);
++    __ beqz(t0, L_short_aligned);
++    __ j(RuntimeAddress(byte_copy_entry));
++
++    __ BIND(L_short_aligned);
++    __ srli(count, count, LogBytesPerShort);  // size => short_count
++    __ j(RuntimeAddress(short_copy_entry));
++    __ BIND(L_int_aligned);
++    __ srli(count, count, LogBytesPerInt);    // size => int_count
++    __ j(RuntimeAddress(int_copy_entry));
++    __ BIND(L_long_aligned);
++    __ srli(count, count, LogBytesPerLong);   // size => long_count
++    __ j(RuntimeAddress(long_copy_entry));
++
++    return start;
 +  }
-+#define inc_counter_np(counter) \
-+  BLOCK_COMMENT("inc_counter " #counter); \
-+  inc_counter_np_(counter);
-+#endif
 +
-+  // Call stubs are used to call Java from C
-+  //
-+  // Arguments:
-+  //    c_rarg0:   call wrapper address                   address
-+  //    c_rarg1:   result                                 address
-+  //    c_rarg2:   result type                            BasicType
-+  //    c_rarg3:   method                                 Method*
-+  //    c_rarg4:   (interpreter) entry point              address
-+  //    c_rarg5:   parameters                             intptr_t*
-+  //    c_rarg6:   parameter size (in words)              int
-+  //    c_rarg7:   thread                                 Thread*
-+  //
-+  // There is no return from the stub itself as any Java result
-+  // is written to result
 +  //
-+  // we save x1 (ra) as the return PC at the base of the frame and
-+  // link x8 (fp) below it as the frame pointer installing sp (x2)
-+  // into fp.
++  //  Generate generic array copy stubs
 +  //
-+  // we save x10-x17, which accounts for all the c arguments.
++  //  Input:
++  //    c_rarg0    -  src oop
++  //    c_rarg1    -  src_pos (32-bits)
++  //    c_rarg2    -  dst oop
++  //    c_rarg3    -  dst_pos (32-bits)
++  //    c_rarg4    -  element count (32-bits)
 +  //
-+  // TODO: strictly do we need to save them all? they are treated as
-+  // volatile by C so could we omit saving the ones we are going to
-+  // place in global registers (thread? method?) or those we only use
-+  // during setup of the Java call?
++  //  Output:
++  //    x10 ==  0  -  success
++  //    x10 == -1^K - failure, where K is partial transfer count
 +  //
-+  // we don't need to save x5 which C uses as an indirect result location
-+  // return register.
-+  //
-+  // we don't need to save x6-x7 and x28-x31 which both C and Java treat as
-+  // volatile
-+  //
-+  // we save x18-x27 which Java uses as temporary registers and C
-+  // expects to be callee-save
-+  //
-+  // so the stub frame looks like this when we enter Java code
-+  //
-+  //     [ return_from_Java     ] <--- sp
-+  //     [ argument word n      ]
-+  //      ...
-+  // -22 [ argument word 1      ]
-+  // -21 [ saved x27            ] <--- sp_after_call
-+  // -20 [ saved x26            ]
-+  // -19 [ saved x25            ]
-+  // -18 [ saved x24            ]
-+  // -17 [ saved x23            ]
-+  // -16 [ saved x22            ]
-+  // -15 [ saved x21            ]
-+  // -14 [ saved x20            ]
-+  // -13 [ saved x19            ]
-+  // -12 [ saved x18            ]
-+  // -11 [ saved x9             ]
-+  // -10 [ call wrapper   (x10) ]
-+  //  -9 [ result         (x11) ]
-+  //  -8 [ result type    (x12) ]
-+  //  -7 [ method         (x13) ]
-+  //  -6 [ entry point    (x14) ]
-+  //  -5 [ parameters     (x15) ]
-+  //  -4 [ parameter size (x16) ]
-+  //  -3 [ thread         (x17) ]
-+  //  -2 [ saved fp       (x8)  ]
-+  //  -1 [ saved ra       (x1)  ]
-+  //   0 [                      ] <--- fp == saved sp (x2)
++  address generate_generic_copy(const char* name,
++                                address byte_copy_entry, address short_copy_entry,
++                                address int_copy_entry, address oop_copy_entry,
++                                address long_copy_entry, address checkcast_copy_entry) {
++    assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL &&
++                int_copy_entry != NULL && oop_copy_entry != NULL &&
++                long_copy_entry != NULL && checkcast_copy_entry != NULL);
++    Label L_failed, L_failed_0, L_objArray;
++    Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs;
 +
-+  // Call stub stack layout word offsets from fp
-+  enum call_stub_layout {
-+    sp_after_call_off  = -21,
++    // Input registers
++    const Register src        = c_rarg0;  // source array oop
++    const Register src_pos    = c_rarg1;  // source position
++    const Register dst        = c_rarg2;  // destination array oop
++    const Register dst_pos    = c_rarg3;  // destination position
++    const Register length     = c_rarg4;
 +
-+    x27_off            = -21,
-+    x26_off            = -20,
-+    x25_off            = -19,
-+    x24_off            = -18,
-+    x23_off            = -17,
-+    x22_off            = -16,
-+    x21_off            = -15,
-+    x20_off            = -14,
-+    x19_off            = -13,
-+    x18_off            = -12,
-+    x9_off             = -11,
++    // Registers used as temps
++    const Register dst_klass = c_rarg5;
 +
-+    call_wrapper_off   = -10,
-+    result_off         = -9,
-+    result_type_off    = -8,
-+    method_off         = -7,
-+    entry_point_off    = -6,
-+    parameters_off     = -5,
-+    parameter_size_off = -4,
-+    thread_off         = -3,
-+    fp_f               = -2,
-+    retaddr_off        = -1,
-+  };
++    __ align(CodeEntryAlignment);
 +
-+  address generate_call_stub(address& return_address) {
-+    assert((int)frame::entry_frame_after_call_words == -(int)sp_after_call_off + 1 &&
-+           (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off,
-+           "adjust this code");
++    StubCodeMark mark(this, "StubRoutines", name);
 +
-+    StubCodeMark mark(this, "StubRoutines", "call_stub");
 +    address start = __ pc();
 +
-+    const Address sp_after_call (fp, sp_after_call_off  * wordSize);
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
 +
-+    const Address call_wrapper  (fp, call_wrapper_off   * wordSize);
-+    const Address result        (fp, result_off         * wordSize);
-+    const Address result_type   (fp, result_type_off    * wordSize);
-+    const Address method        (fp, method_off         * wordSize);
-+    const Address entry_point   (fp, entry_point_off    * wordSize);
-+    const Address parameters    (fp, parameters_off     * wordSize);
-+    const Address parameter_size(fp, parameter_size_off * wordSize);
++    // bump this on entry, not on exit:
++    inc_counter_np(SharedRuntime::_generic_array_copy_ctr);
 +
-+    const Address thread        (fp, thread_off         * wordSize);
++    //-----------------------------------------------------------------------
++    // Assembler stub will be used for this call to arraycopy
++    // if the following conditions are met:
++    //
++    // (1) src and dst must not be null.
++    // (2) src_pos must not be negative.
++    // (3) dst_pos must not be negative.
++    // (4) length  must not be negative.
++    // (5) src klass and dst klass should be the same and not NULL.
++    // (6) src and dst should be arrays.
++    // (7) src_pos + length must not exceed length of src.
++    // (8) dst_pos + length must not exceed length of dst.
++    //
 +
-+    const Address x27_save      (fp, x27_off            * wordSize);
-+    const Address x26_save      (fp, x26_off            * wordSize);
-+    const Address x25_save      (fp, x25_off            * wordSize);
-+    const Address x24_save      (fp, x24_off            * wordSize);
-+    const Address x23_save      (fp, x23_off            * wordSize);
-+    const Address x22_save      (fp, x22_off            * wordSize);
-+    const Address x21_save      (fp, x21_off            * wordSize);
-+    const Address x20_save      (fp, x20_off            * wordSize);
-+    const Address x19_save      (fp, x19_off            * wordSize);
-+    const Address x18_save      (fp, x18_off            * wordSize);
++    // if [src == NULL] then return -1
++    __ beqz(src, L_failed);
 +
-+    const Address x9_save       (fp, x9_off             * wordSize);
++    // if [src_pos < 0] then return -1
++    // i.e. sign bit set
++    __ andi(t0, src_pos, 1UL << 31);
++    __ bnez(t0, L_failed);
 +
-+    // stub code
++    // if [dst == NULL] then return -1
++    __ beqz(dst, L_failed);
 +
-+    address riscv_entry = __ pc();
++    // if [dst_pos < 0] then return -1
++    // i.e. sign bit set
++    __ andi(t0, dst_pos, 1UL << 31);
++    __ bnez(t0, L_failed);
 +
-+    // set up frame and move sp to end of save area
-+    __ enter();
-+    __ addi(sp, fp, sp_after_call_off * wordSize);
++    // registers used as temp
++    const Register scratch_length    = x28; // elements count to copy
++    const Register scratch_src_klass = x29; // array klass
++    const Register lh                = x30; // layout helper
 +
-+    // save register parameters and Java temporary/global registers
-+    // n.b. we save thread even though it gets installed in
-+    // xthread because we want to sanity check tp later
-+    __ sd(c_rarg7, thread);
-+    __ sw(c_rarg6, parameter_size);
-+    __ sd(c_rarg5, parameters);
-+    __ sd(c_rarg4, entry_point);
-+    __ sd(c_rarg3, method);
-+    __ sd(c_rarg2, result_type);
-+    __ sd(c_rarg1, result);
-+    __ sd(c_rarg0, call_wrapper);
++    // if [length < 0] then return -1
++    __ addw(scratch_length, length, zr);    // length (elements count, 32-bits value)
++    // i.e. sign bit set
++    __ andi(t0, scratch_length, 1UL << 31);
++    __ bnez(t0, L_failed);
 +
-+    __ sd(x9, x9_save);
++    __ load_klass(scratch_src_klass, src);
++#ifdef ASSERT
++    {
++      BLOCK_COMMENT("assert klasses not null {");
++      Label L1, L2;
++      __ bnez(scratch_src_klass, L2);   // it is broken if klass is NULL
++      __ bind(L1);
++      __ stop("broken null klass");
++      __ bind(L2);
++      __ load_klass(t0, dst);
++      __ beqz(t0, L1);     // this would be broken also
++      BLOCK_COMMENT("} assert klasses not null done");
++    }
++#endif
 +
-+    __ sd(x18, x18_save);
-+    __ sd(x19, x19_save);
-+    __ sd(x20, x20_save);
-+    __ sd(x21, x21_save);
-+    __ sd(x22, x22_save);
-+    __ sd(x23, x23_save);
-+    __ sd(x24, x24_save);
-+    __ sd(x25, x25_save);
-+    __ sd(x26, x26_save);
-+    __ sd(x27, x27_save);
++    // Load layout helper (32-bits)
++    //
++    //  |array_tag|     | header_size | element_type |     |log2_element_size|
++    // 32        30    24            16              8     2                 0
++    //
++    //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
++    //
 +
-+    // install Java thread in global register now we have saved
-+    // whatever value it held
-+    __ mv(xthread, c_rarg7);
++    const int lh_offset = in_bytes(Klass::layout_helper_offset());
 +
-+    // And method
-+    __ mv(xmethod, c_rarg3);
++    // Handle objArrays completely differently...
++    const jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
++    __ lw(lh, Address(scratch_src_klass, lh_offset));
++    __ mvw(t0, objArray_lh);
++    __ beq(lh, t0, L_objArray);
 +
-+    // set up the heapbase register
-+    __ reinit_heapbase();
++    // if [src->klass() != dst->klass()] then return -1
++    __ load_klass(t1, dst);
++    __ bne(t1, scratch_src_klass, L_failed);
++
++    // if [src->is_Array() != NULL] then return -1
++    // i.e. (lh >= 0)
++    __ andi(t0, lh, 1UL << 31);
++    __ beqz(t0, L_failed);
 +
++    // At this point, it is known to be a typeArray (array_tag 0x3).
 +#ifdef ASSERT
-+    // make sure we have no pending exceptions
 +    {
++      BLOCK_COMMENT("assert primitive array {");
 +      Label L;
-+      __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
-+      __ beqz(t0, L);
-+      __ stop("StubRoutines::call_stub: entered with pending exception");
-+      __ BIND(L);
++      __ mvw(t1, Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
++      __ bge(lh, t1, L);
++      __ stop("must be a primitive array");
++      __ bind(L);
++      BLOCK_COMMENT("} assert primitive array done");
 +    }
 +#endif
-+    // pass parameters if any
-+    __ mv(esp, sp);
-+    __ slli(t0, c_rarg6, LogBytesPerWord);
-+    __ sub(t0, sp, t0); // Move SP out of the way
-+    __ andi(sp, t0, -2 * wordSize);
 +
-+    BLOCK_COMMENT("pass parameters if any");
-+    Label parameters_done;
-+    // parameter count is still in c_rarg6
-+    // and parameter pointer identifying param 1 is in c_rarg5
-+    __ beqz(c_rarg6, parameters_done);
++    arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
++                           t1, L_failed);
 +
-+    address loop = __ pc();
-+    __ ld(t0, c_rarg5, 0);
-+    __ addi(c_rarg5, c_rarg5, wordSize);
-+    __ addi(c_rarg6, c_rarg6, -1);
-+    __ push_reg(t0);
-+    __ bgtz(c_rarg6, loop);
++    // TypeArrayKlass
++    //
++    // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize)
++    // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize)
++    //
 +
-+    __ BIND(parameters_done);
++    const Register t0_offset = t0;    // array offset
++    const Register x22_elsize = lh;   // element size
 +
-+    // call Java entry -- passing methdoOop, and current sp
-+    //      xmethod: Method*
-+    //      x30: sender sp
-+    BLOCK_COMMENT("call Java function");
-+    __ mv(x30, sp);
-+    __ jalr(c_rarg4);
++    // Get array_header_in_bytes()
++    int lh_header_size_width = exact_log2(Klass::_lh_header_size_mask + 1);
++    int lh_header_size_msb = Klass::_lh_header_size_shift + lh_header_size_width;
++    __ slli(t0_offset, lh, XLEN - lh_header_size_msb);          // left shift to remove 24 ~ 32;
++    __ srli(t0_offset, t0_offset, XLEN - lh_header_size_width); // array_offset
 +
-+    // save current address for use by exception handling code
++    __ add(src, src, t0_offset);           // src array offset
++    __ add(dst, dst, t0_offset);           // dst array offset
++    BLOCK_COMMENT("choose copy loop based on element size");
 +
-+    return_address = __ pc();
++    // next registers should be set before the jump to corresponding stub
++    const Register from     = c_rarg0;  // source array address
++    const Register to       = c_rarg1;  // destination array address
++    const Register count    = c_rarg2;  // elements count
 +
-+    // store result depending on type (everything that is not
-+    // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
-+    // n.b. this assumes Java returns an integral result in x10
-+    // and a floating result in j_farg0
-+    __ ld(j_rarg2, result);
-+    Label is_long, is_float, is_double, exit;
-+    __ ld(j_rarg1, result_type);
-+    __ li(t0, (u1)T_OBJECT);
-+    __ beq(j_rarg1, t0, is_long);
-+    __ li(t0, (u1)T_LONG);
-+    __ beq(j_rarg1, t0, is_long);
-+    __ li(t0, (u1)T_FLOAT);
-+    __ beq(j_rarg1, t0, is_float);
-+    __ li(t0, (u1)T_DOUBLE);
-+    __ beq(j_rarg1, t0, is_double);
++    // 'from', 'to', 'count' registers should be set in such order
++    // since they are the same as 'src', 'src_pos', 'dst'.
 +
-+    // handle T_INT case
-+    __ sw(x10, Address(j_rarg2));
++    assert(Klass::_lh_log2_element_size_shift == 0, "fix this code");
 +
-+    __ BIND(exit);
++    // The possible values of elsize are 0-3, i.e. exact_log2(element
++    // size in bytes).  We do a simple bitwise binary search.
++  __ BIND(L_copy_bytes);
++    __ andi(t0, x22_elsize, 2);
++    __ bnez(t0, L_copy_ints);
++    __ andi(t0, x22_elsize, 1);
++    __ bnez(t0, L_copy_shorts);
++    __ add(from, src, src_pos); // src_addr
++    __ add(to, dst, dst_pos); // dst_addr
++    __ addw(count, scratch_length, zr); // length
++    __ j(RuntimeAddress(byte_copy_entry));
 +
-+    // pop parameters
-+    __ addi(esp, fp, sp_after_call_off * wordSize);
++  __ BIND(L_copy_shorts);
++    __ shadd(from, src_pos, src, t0, 1); // src_addr
++    __ shadd(to, dst_pos, dst, t0, 1); // dst_addr
++    __ addw(count, scratch_length, zr); // length
++    __ j(RuntimeAddress(short_copy_entry));
++
++  __ BIND(L_copy_ints);
++    __ andi(t0, x22_elsize, 1);
++    __ bnez(t0, L_copy_longs);
++    __ shadd(from, src_pos, src, t0, 2); // src_addr
++    __ shadd(to, dst_pos, dst, t0, 2); // dst_addr
++    __ addw(count, scratch_length, zr); // length
++    __ j(RuntimeAddress(int_copy_entry));
 +
++  __ BIND(L_copy_longs);
 +#ifdef ASSERT
-+    // verify that threads correspond
 +    {
-+      Label L, S;
-+      __ ld(t0, thread);
-+      __ bne(xthread, t0, S);
-+      __ get_thread(t0);
-+      __ beq(xthread, t0, L);
-+      __ BIND(S);
-+      __ stop("StubRoutines::call_stub: threads must correspond");
-+      __ BIND(L);
++      BLOCK_COMMENT("assert long copy {");
++      Label L;
++      __ andi(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> x22_elsize
++      __ addw(lh, lh, zr);
++      __ mvw(t0, LogBytesPerLong);
++      __ beq(x22_elsize, t0, L);
++      __ stop("must be long copy, but elsize is wrong");
++      __ bind(L);
++      BLOCK_COMMENT("} assert long copy done");
 +    }
 +#endif
++    __ shadd(from, src_pos, src, t0, 3); // src_addr
++    __ shadd(to, dst_pos, dst, t0, 3); // dst_addr
++    __ addw(count, scratch_length, zr); // length
++    __ j(RuntimeAddress(long_copy_entry));
 +
-+    // restore callee-save registers
-+    __ ld(x27, x27_save);
-+    __ ld(x26, x26_save);
-+    __ ld(x25, x25_save);
-+    __ ld(x24, x24_save);
-+    __ ld(x23, x23_save);
-+    __ ld(x22, x22_save);
-+    __ ld(x21, x21_save);
-+    __ ld(x20, x20_save);
-+    __ ld(x19, x19_save);
-+    __ ld(x18, x18_save);
-+
-+    __ ld(x9, x9_save);
++    // ObjArrayKlass
++  __ BIND(L_objArray);
++    // live at this point:  scratch_src_klass, scratch_length, src[_pos], dst[_pos]
 +
-+    __ ld(c_rarg0, call_wrapper);
-+    __ ld(c_rarg1, result);
-+    __ ld(c_rarg2, result_type);
-+    __ ld(c_rarg3, method);
-+    __ ld(c_rarg4, entry_point);
-+    __ ld(c_rarg5, parameters);
-+    __ ld(c_rarg6, parameter_size);
-+    __ ld(c_rarg7, thread);
++    Label L_plain_copy, L_checkcast_copy;
++    // test array classes for subtyping
++    __ load_klass(t2, dst);
++    __ bne(scratch_src_klass, t2, L_checkcast_copy); // usual case is exact equality
 +
-+    // leave frame and return to caller
-+    __ leave();
-+    __ ret();
++    // Identically typed arrays can be copied without element-wise checks.
++    arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
++                           t1, L_failed);
 +
-+    // handle return types different from T_INT
++    __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop);
++    __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++    __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop);
++    __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++    __ addw(count, scratch_length, zr); // length
++  __ BIND(L_plain_copy);
++    __ j(RuntimeAddress(oop_copy_entry));
 +
-+    __ BIND(is_long);
-+    __ sd(x10, Address(j_rarg2, 0));
-+    __ j(exit);
++  __ BIND(L_checkcast_copy);
++    // live at this point:  scratch_src_klass, scratch_length, t2 (dst_klass)
++    {
++      // Before looking at dst.length, make sure dst is also an objArray.
++      __ lwu(t0, Address(t2, lh_offset));
++      __ mvw(t1, objArray_lh);
++      __ bne(t0, t1, L_failed);
 +
-+    __ BIND(is_float);
-+    __ fsw(j_farg0, Address(j_rarg2, 0), t0);
-+    __ j(exit);
++      // It is safe to examine both src.length and dst.length.
++      arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
++                             t2, L_failed);
 +
-+    __ BIND(is_double);
-+    __ fsd(j_farg0, Address(j_rarg2, 0), t0);
-+    __ j(exit);
++      __ load_klass(dst_klass, dst); // reload
 +
-+    return start;
-+  }
++      // Marshal the base address arguments now, freeing registers.
++      __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop);
++      __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++      __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop);
++      __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++      __ addw(count, length, zr);           // length (reloaded)
++      const Register sco_temp = c_rarg3;      // this register is free now
++      assert_different_registers(from, to, count, sco_temp,
++                                 dst_klass, scratch_src_klass);
 +
-+  // Return point for a Java call if there's an exception thrown in
-+  // Java code.  The exception is caught and transformed into a
-+  // pending exception stored in JavaThread that can be tested from
-+  // within the VM.
-+  //
-+  // Note: Usually the parameters are removed by the callee. In case
-+  // of an exception crossing an activation frame boundary, that is
-+  // not the case if the callee is compiled code => need to setup the
-+  // sp.
-+  //
-+  // x10: exception oop
++      // Generate the type check.
++      const int sco_offset = in_bytes(Klass::super_check_offset_offset());
++      __ lwu(sco_temp, Address(dst_klass, sco_offset));
 +
-+  address generate_catch_exception() {
-+    StubCodeMark mark(this, "StubRoutines", "catch_exception");
-+    address start = __ pc();
++      // Smashes t0, t1
++      generate_type_check(scratch_src_klass, sco_temp, dst_klass, L_plain_copy);
 +
-+    // same as in generate_call_stub():
-+    const Address thread(fp, thread_off * wordSize);
++      // Fetch destination element klass from the ObjArrayKlass header.
++      int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
++      __ ld(dst_klass, Address(dst_klass, ek_offset));
++      __ lwu(sco_temp, Address(dst_klass, sco_offset));
 +
-+#ifdef ASSERT
-+    // verify that threads correspond
-+    {
-+      Label L, S;
-+      __ ld(t0, thread);
-+      __ bne(xthread, t0, S);
-+      __ get_thread(t0);
-+      __ beq(xthread, t0, L);
-+      __ bind(S);
-+      __ stop("StubRoutines::catch_exception: threads must correspond");
-+      __ bind(L);
++      // the checkcast_copy loop needs two extra arguments:
++      assert(c_rarg3 == sco_temp, "#3 already in place");
++      // Set up arguments for checkcast_copy_entry.
++      __ mv(c_rarg4, dst_klass);  // dst.klass.element_klass
++      __ j(RuntimeAddress(checkcast_copy_entry));
 +    }
-+#endif
-+
-+    // set pending exception
-+    __ verify_oop(x10);
-+
-+    __ sd(x10, Address(xthread, Thread::pending_exception_offset()));
-+    __ mv(t0, (address)__FILE__);
-+    __ sd(t0, Address(xthread, Thread::exception_file_offset()));
-+    __ mv(t0, (int)__LINE__);
-+    __ sw(t0, Address(xthread, Thread::exception_line_offset()));
 +
-+    // complete return to VM
-+    assert(StubRoutines::_call_stub_return_address != NULL,
-+           "_call_stub_return_address must have been generated before");
-+    __ j(StubRoutines::_call_stub_return_address);
++  __ BIND(L_failed);
++    __ mv(x10, -1);
++    __ leave();   // required for proper stackwalking of RuntimeStub frame
++    __ ret();
 +
 +    return start;
 +  }
 +
-+  // Continuation point for runtime calls returning with a pending
-+  // exception.  The pending exception check happened in the runtime
-+  // or native call stub.  The pending exception in Thread is
-+  // converted into a Java-level exception.
 +  //
-+  // Contract with Java-level exception handlers:
-+  // x10: exception
-+  // x13: throwing pc
++  // Generate stub for array fill. If "aligned" is true, the
++  // "to" address is assumed to be heapword aligned.
 +  //
-+  // NOTE: At entry of this stub, exception-pc must be in RA !!
-+
-+  // NOTE: this is always used as a jump target within generated code
-+  // so it just needs to be generated code with no x86 prolog
-+
-+  address generate_forward_exception() {
-+    StubCodeMark mark(this, "StubRoutines", "forward exception");
++  // Arguments for generated stub:
++  //   to:    c_rarg0
++  //   value: c_rarg1
++  //   count: c_rarg2 treated as signed
++  //
++  address generate_fill(BasicType t, bool aligned, const char* name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
 +    address start = __ pc();
 +
-+    // Upon entry, RA points to the return address returning into
-+    // Java (interpreted or compiled) code; i.e., the return address
-+    // becomes the throwing pc.
-+    //
-+    // Arguments pushed before the runtime call are still on the stack
-+    // but the exception handler will reset the stack pointer ->
-+    // ignore them.  A potential result in registers can be ignored as
-+    // well.
++    BLOCK_COMMENT("Entry:");
 +
-+#ifdef ASSERT
-+    // make sure this code is only executed if there is a pending exception
-+    {
-+      Label L;
-+      __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
-+      __ bnez(t0, L);
-+      __ stop("StubRoutines::forward exception: no pending exception (1)");
-+      __ bind(L);
-+    }
-+#endif
++    const Register to        = c_rarg0;  // source array address
++    const Register value     = c_rarg1;  // value
++    const Register count     = c_rarg2;  // elements count
 +
-+    // compute exception handler into x9
++    const Register bz_base   = x28;      // base for block_zero routine
++    const Register cnt_words = x29;      // temp register
++    const Register tmp_reg   = t1;
 +
-+    // call the VM to find the handler address associated with the
-+    // caller address. pass thread in x10 and caller pc (ret address)
-+    // in x11. n.b. the caller pc is in ra, unlike x86 where it is on
-+    // the stack.
-+    __ mv(c_rarg1, ra);
-+    // ra will be trashed by the VM call so we move it to x9
-+    // (callee-saved) because we also need to pass it to the handler
-+    // returned by this call.
-+    __ mv(x9, ra);
-+    BLOCK_COMMENT("call exception_handler_for_return_address");
-+    __ call_VM_leaf(CAST_FROM_FN_PTR(address,
-+                         SharedRuntime::exception_handler_for_return_address),
-+                    xthread, c_rarg1);
-+    // we should not really care that ra is no longer the callee
-+    // address. we saved the value the handler needs in x9 so we can
-+    // just copy it to x13. however, the C2 handler will push its own
-+    // frame and then calls into the VM and the VM code asserts that
-+    // the PC for the frame above the handler belongs to a compiled
-+    // Java method. So, we restore ra here to satisfy that assert.
-+    __ mv(ra, x9);
-+    // setup x10 & x13 & clear pending exception
-+    __ mv(x13, x9);
-+    __ mv(x9, x10);
-+    __ ld(x10, Address(xthread, Thread::pending_exception_offset()));
-+    __ sd(zr, Address(xthread, Thread::pending_exception_offset()));
-+
-+#ifdef ASSERT
-+    // make sure exception is set
-+    {
-+      Label L;
-+      __ bnez(x10, L);
-+      __ stop("StubRoutines::forward exception: no pending exception (2)");
-+      __ bind(L);
-+    }
-+#endif
-+
-+    // continue at exception handler
-+    // x10: exception
-+    // x13: throwing pc
-+    // x9: exception handler
-+    __ verify_oop(x10);
-+    __ jr(x9);
++    __ enter();
 +
-+    return start;
-+  }
++    Label L_fill_elements, L_exit1;
 +
-+  // Non-destructive plausibility checks for oops
-+  //
-+  // Arguments:
-+  //    x10: oop to verify
-+  //    t0: error message
-+  //
-+  // Stack after saving c_rarg3:
-+  //    [tos + 0]: saved c_rarg3
-+  //    [tos + 1]: saved c_rarg2
-+  //    [tos + 2]: saved ra
-+  //    [tos + 3]: saved t1
-+  //    [tos + 4]: saved x10
-+  //    [tos + 5]: saved t0
-+  address generate_verify_oop() {
++    int shift = -1;
++    switch (t) {
++      case T_BYTE:
++        shift = 0;
 +
-+    StubCodeMark mark(this, "StubRoutines", "verify_oop");
-+    address start = __ pc();
++        // Zero extend value
++        // 8 bit -> 16 bit
++        __ andi(value, value, 0xff);
++        __ mv(tmp_reg, value);
++        __ slli(tmp_reg, tmp_reg, 8);
++        __ orr(value, value, tmp_reg);
 +
-+    Label exit, error;
++        // 16 bit -> 32 bit
++        __ mv(tmp_reg, value);
++        __ slli(tmp_reg, tmp_reg, 16);
++        __ orr(value, value, tmp_reg);
 +
-+    __ push_reg(0x3000, sp);   // save c_rarg2 and c_rarg3
++        __ mv(tmp_reg, 8 >> shift); // Short arrays (< 8 bytes) fill by element
++        __ bltu(count, tmp_reg, L_fill_elements);
++        break;
++      case T_SHORT:
++        shift = 1;
++        // Zero extend value
++        // 16 bit -> 32 bit
++        __ andi(value, value, 0xffff);
++        __ mv(tmp_reg, value);
++        __ slli(tmp_reg, tmp_reg, 16);
++        __ orr(value, value, tmp_reg);
 +
-+    __ la(c_rarg2, ExternalAddress((address) StubRoutines::verify_oop_count_addr()));
-+    __ ld(c_rarg3, Address(c_rarg2));
-+    __ add(c_rarg3, c_rarg3, 1);
-+    __ sd(c_rarg3, Address(c_rarg2));
++        // Short arrays (< 8 bytes) fill by element
++        __ mv(tmp_reg, 8 >> shift);
++        __ bltu(count, tmp_reg, L_fill_elements);
++        break;
++      case T_INT:
++        shift = 2;
 +
-+    // object is in x10
-+    // make sure object is 'reasonable'
-+    __ beqz(x10, exit); // if obj is NULL it is OK
++        // Short arrays (< 8 bytes) fill by element
++        __ mv(tmp_reg, 8 >> shift);
++        __ bltu(count, tmp_reg, L_fill_elements);
++        break;
++      default: ShouldNotReachHere();
++    }
 +
-+#if INCLUDE_ZGC
-+    if (UseZGC) {
-+      // Check if mask is good.
-+      // verifies that ZAddressBadMask & x10 == 0
-+      __ ld(c_rarg3, Address(xthread, ZThreadLocalData::address_bad_mask_offset()));
-+      __ andr(c_rarg2, x10, c_rarg3);
-+      __ bnez(c_rarg2, error);
++    // Align source address at 8 bytes address boundary.
++    Label L_skip_align1, L_skip_align2, L_skip_align4;
++    if (!aligned) {
++      switch (t) {
++        case T_BYTE:
++          // One byte misalignment happens only for byte arrays.
++          __ andi(t0, to, 1);
++          __ beqz(t0, L_skip_align1);
++          __ sb(value, Address(to, 0));
++          __ addi(to, to, 1);
++          __ addiw(count, count, -1);
++          __ bind(L_skip_align1);
++          // Fallthrough
++        case T_SHORT:
++          // Two bytes misalignment happens only for byte and short (char) arrays.
++          __ andi(t0, to, 2);
++          __ beqz(t0, L_skip_align2);
++          __ sh(value, Address(to, 0));
++          __ addi(to, to, 2);
++          __ addiw(count, count, -(2 >> shift));
++          __ bind(L_skip_align2);
++          // Fallthrough
++        case T_INT:
++          // Align to 8 bytes, we know we are 4 byte aligned to start.
++          __ andi(t0, to, 4);
++          __ beqz(t0, L_skip_align4);
++          __ sw(value, Address(to, 0));
++          __ addi(to, to, 4);
++          __ addiw(count, count, -(4 >> shift));
++          __ bind(L_skip_align4);
++          break;
++        default: ShouldNotReachHere();
++      }
 +    }
-+#endif
 +
-+    // Check if the oop is in the right area of memory
-+    __ mv(c_rarg3, (intptr_t) Universe::verify_oop_mask());
-+    __ andr(c_rarg2, x10, c_rarg3);
-+    __ mv(c_rarg3, (intptr_t) Universe::verify_oop_bits());
++    //
++    //  Fill large chunks
++    //
++    __ srliw(cnt_words, count, 3 - shift); // number of words
 +
-+    // Compare c_rarg2 and c_rarg3.
-+    __ bne(c_rarg2, c_rarg3, error);
++    // 32 bit -> 64 bit
++    __ andi(value, value, 0xffffffff);
++    __ mv(tmp_reg, value);
++    __ slli(tmp_reg, tmp_reg, 32);
++    __ orr(value, value, tmp_reg);
 +
-+    // make sure klass is 'reasonable', which is not zero.
-+    __ load_klass(x10, x10);  // get klass
-+    __ beqz(x10, error);      // if klass is NULL it is broken
++    __ slli(tmp_reg, cnt_words, 3 - shift);
++    __ subw(count, count, tmp_reg);
++    {
++      __ fill_words(to, cnt_words, value);
++    }
 +
-+    // return if everything seems ok
-+    __ bind(exit);
++    // Remaining count is less than 8 bytes. Fill it by a single store.
++    // Note that the total length is no less than 8 bytes.
++    if (t == T_BYTE || t == T_SHORT) {
++      __ beqz(count, L_exit1);
++      __ shadd(to, count, to, tmp_reg, shift); // points to the end
++      __ sd(value, Address(to, -8)); // overwrite some elements
++      __ bind(L_exit1);
++      __ leave();
++      __ ret();
++    }
 +
-+    __ pop_reg(0x3000, sp);   // pop c_rarg2 and c_rarg3
++    // Handle copies less than 8 bytes.
++    Label L_fill_2, L_fill_4, L_exit2;
++    __ bind(L_fill_elements);
++    switch (t) {
++      case T_BYTE:
++        __ andi(t0, count, 1);
++        __ beqz(t0, L_fill_2);
++        __ sb(value, Address(to, 0));
++        __ addi(to, to, 1);
++        __ bind(L_fill_2);
++        __ andi(t0, count, 2);
++        __ beqz(t0, L_fill_4);
++        __ sh(value, Address(to, 0));
++        __ addi(to, to, 2);
++        __ bind(L_fill_4);
++        __ andi(t0, count, 4);
++        __ beqz(t0, L_exit2);
++        __ sw(value, Address(to, 0));
++        break;
++      case T_SHORT:
++        __ andi(t0, count, 1);
++        __ beqz(t0, L_fill_4);
++        __ sh(value, Address(to, 0));
++        __ addi(to, to, 2);
++        __ bind(L_fill_4);
++        __ andi(t0, count, 2);
++        __ beqz(t0, L_exit2);
++        __ sw(value, Address(to, 0));
++        break;
++      case T_INT:
++        __ beqz(count, L_exit2);
++        __ sw(value, Address(to, 0));
++        break;
++      default: ShouldNotReachHere();
++    }
++    __ bind(L_exit2);
++    __ leave();
 +    __ ret();
++    return start;
++  }
 +
-+    // handle errors
-+    __ bind(error);
-+    __ pop_reg(0x3000, sp);   // pop c_rarg2 and c_rarg3
++  void generate_arraycopy_stubs() {
++    address entry                     = NULL;
++    address entry_jbyte_arraycopy     = NULL;
++    address entry_jshort_arraycopy    = NULL;
++    address entry_jint_arraycopy      = NULL;
++    address entry_oop_arraycopy       = NULL;
++    address entry_jlong_arraycopy     = NULL;
++    address entry_checkcast_arraycopy = NULL;
 +
-+    __ pusha();
-+    // debug(char* msg, int64_t pc, int64_t regs[])
-+    __ mv(c_rarg0, t0);             // pass address of error message
-+    __ mv(c_rarg1, ra);             // pass return address
-+    __ mv(c_rarg2, sp);             // pass address of regs on stack
-+#ifndef PRODUCT
-+    assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
-+#endif
-+    BLOCK_COMMENT("call MacroAssembler::debug");
-+    int32_t offset = 0;
-+    __ movptr_with_offset(t0, CAST_FROM_FN_PTR(address, MacroAssembler::debug64), offset);
-+    __ jalr(x1, t0, offset);
-+    __ ebreak();
++    generate_copy_longs(copy_f, c_rarg0, c_rarg1, t1, copy_forwards);
++    generate_copy_longs(copy_b, c_rarg0, c_rarg1, t1, copy_backwards);
 +
-+    return start;
-+  }
++    StubRoutines::riscv::_zero_blocks = generate_zero_blocks();
 +
-+  // The inner part of zero_words().
-+  //
-+  // Inputs:
-+  // x28: the HeapWord-aligned base address of an array to zero.
-+  // x29: the count in HeapWords, x29 > 0.
-+  //
-+  // Returns x28 and x29, adjusted for the caller to clear.
-+  // x28: the base address of the tail of words left to clear.
-+  // x29: the number of words in the tail.
-+  //      x29 < MacroAssembler::zero_words_block_size.
++    //*** jbyte
++    // Always need aligned and unaligned versions
++    StubRoutines::_jbyte_disjoint_arraycopy          = generate_disjoint_byte_copy(false, &entry,
++                                                                                   "jbyte_disjoint_arraycopy");
++    StubRoutines::_jbyte_arraycopy                   = generate_conjoint_byte_copy(false, entry,
++                                                                                   &entry_jbyte_arraycopy,
++                                                                                   "jbyte_arraycopy");
++    StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = generate_disjoint_byte_copy(true, &entry,
++                                                                                   "arrayof_jbyte_disjoint_arraycopy");
++    StubRoutines::_arrayof_jbyte_arraycopy           = generate_conjoint_byte_copy(true, entry, NULL,
++                                                                                   "arrayof_jbyte_arraycopy");
 +
-+  address generate_zero_blocks() {
-+    Label done;
++    //*** jshort
++    // Always need aligned and unaligned versions
++    StubRoutines::_jshort_disjoint_arraycopy         = generate_disjoint_short_copy(false, &entry,
++                                                                                    "jshort_disjoint_arraycopy");
++    StubRoutines::_jshort_arraycopy                  = generate_conjoint_short_copy(false, entry,
++                                                                                    &entry_jshort_arraycopy,
++                                                                                    "jshort_arraycopy");
++    StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry,
++                                                                                    "arrayof_jshort_disjoint_arraycopy");
++    StubRoutines::_arrayof_jshort_arraycopy          = generate_conjoint_short_copy(true, entry, NULL,
++                                                                                    "arrayof_jshort_arraycopy");
 +
-+    const Register base = x28, cnt = x29;
++    //*** jint
++    // Aligned versions
++    StubRoutines::_arrayof_jint_disjoint_arraycopy   = generate_disjoint_int_copy(true, &entry,
++                                                                                  "arrayof_jint_disjoint_arraycopy");
++    StubRoutines::_arrayof_jint_arraycopy            = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy,
++                                                                                  "arrayof_jint_arraycopy");
++    // In 64 bit we need both aligned and unaligned versions of jint arraycopy.
++    // entry_jint_arraycopy always points to the unaligned version
++    StubRoutines::_jint_disjoint_arraycopy           = generate_disjoint_int_copy(false, &entry,
++                                                                                  "jint_disjoint_arraycopy");
++    StubRoutines::_jint_arraycopy                    = generate_conjoint_int_copy(false, entry,
++                                                                                  &entry_jint_arraycopy,
++                                                                                  "jint_arraycopy");
 +
-+    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", "zero_blocks");
-+    address start = __ pc();
++    //*** jlong
++    // It is always aligned
++    StubRoutines::_arrayof_jlong_disjoint_arraycopy  = generate_disjoint_long_copy(true, &entry,
++                                                                                   "arrayof_jlong_disjoint_arraycopy");
++    StubRoutines::_arrayof_jlong_arraycopy           = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy,
++                                                                                   "arrayof_jlong_arraycopy");
++    StubRoutines::_jlong_disjoint_arraycopy          = StubRoutines::_arrayof_jlong_disjoint_arraycopy;
++    StubRoutines::_jlong_arraycopy                   = StubRoutines::_arrayof_jlong_arraycopy;
 +
++    //*** oops
 +    {
-+      // Clear the remaining blocks.
-+      Label loop;
-+      __ sub(cnt, cnt, MacroAssembler::zero_words_block_size);
-+      __ bltz(cnt, done);
-+      __ bind(loop);
-+      for (int i = 0; i < MacroAssembler::zero_words_block_size; i++) {
-+        __ sd(zr, Address(base, 0));
-+        __ add(base, base, 8);
-+      }
-+      __ sub(cnt, cnt, MacroAssembler::zero_words_block_size);
-+      __ bgez(cnt, loop);
-+      __ bind(done);
-+      __ add(cnt, cnt, MacroAssembler::zero_words_block_size);
++      // With compressed oops we need unaligned versions; notice that
++      // we overwrite entry_oop_arraycopy.
++      bool aligned = !UseCompressedOops;
++
++      StubRoutines::_arrayof_oop_disjoint_arraycopy
++        = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy",
++                                     /*dest_uninitialized*/false);
++      StubRoutines::_arrayof_oop_arraycopy
++        = generate_conjoint_oop_copy(aligned, entry, &entry_oop_arraycopy, "arrayof_oop_arraycopy",
++                                     /*dest_uninitialized*/false);
++      // Aligned versions without pre-barriers
++      StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit
++        = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy_uninit",
++                                     /*dest_uninitialized*/true);
++      StubRoutines::_arrayof_oop_arraycopy_uninit
++        = generate_conjoint_oop_copy(aligned, entry, NULL, "arrayof_oop_arraycopy_uninit",
++                                     /*dest_uninitialized*/true);
 +    }
 +
-+    __ ret();
++    StubRoutines::_oop_disjoint_arraycopy            = StubRoutines::_arrayof_oop_disjoint_arraycopy;
++    StubRoutines::_oop_arraycopy                     = StubRoutines::_arrayof_oop_arraycopy;
++    StubRoutines::_oop_disjoint_arraycopy_uninit     = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit;
++    StubRoutines::_oop_arraycopy_uninit              = StubRoutines::_arrayof_oop_arraycopy_uninit;
 +
-+    return start;
-+  }
++    StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
++    StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
++                                                                        /*dest_uninitialized*/true);
 +
-+  typedef enum {
-+    copy_forwards = 1,
-+    copy_backwards = -1
-+  } copy_direction;
 +
-+  // Bulk copy of blocks of 8 words.
-+  //
-+  // count is a count of words.
-+  //
-+  // Precondition: count >= 8
-+  //
-+  // Postconditions:
-+  //
-+  // The least significant bit of count contains the remaining count
-+  // of words to copy.  The rest of count is trash.
-+  //
-+  // s and d are adjusted to point to the remaining words to copy
-+  //
-+  void generate_copy_longs(Label &start, Register s, Register d, Register count,
-+                           copy_direction direction) {
-+    int unit = wordSize * direction;
-+    int bias = wordSize;
++    StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy",
++                                                              entry_jbyte_arraycopy,
++                                                              entry_jshort_arraycopy,
++                                                              entry_jint_arraycopy,
++                                                              entry_jlong_arraycopy);
 +
-+    const Register tmp_reg0 = x13, tmp_reg1 = x14, tmp_reg2 = x15, tmp_reg3 = x16,
-+      tmp_reg4 = x17, tmp_reg5 = x7, tmp_reg6 = x28, tmp_reg7 = x29;
++    StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy",
++                                                               entry_jbyte_arraycopy,
++                                                               entry_jshort_arraycopy,
++                                                               entry_jint_arraycopy,
++                                                               entry_oop_arraycopy,
++                                                               entry_jlong_arraycopy,
++                                                               entry_checkcast_arraycopy);
 +
-+    const Register stride = x30;
++    StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
++    StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
++    StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
++    StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
++    StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
++    StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
++  }
 +
-+    assert_different_registers(t0, tmp_reg0, tmp_reg1, tmp_reg2, tmp_reg3,
-+      tmp_reg4, tmp_reg5, tmp_reg6, tmp_reg7);
-+    assert_different_registers(s, d, count, t0);
++  // Safefetch stubs.
++  void generate_safefetch(const char* name, int size, address* entry,
++                          address* fault_pc, address* continuation_pc) {
++    // safefetch signatures:
++    //   int      SafeFetch32(int*      adr, int      errValue)
++    //   intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue)
++    //
++    // arguments:
++    //   c_rarg0 = adr
++    //   c_rarg1 = errValue
++    //
++    // result:
++    //   PPC_RET  = *adr or errValue
++    assert_cond(entry != NULL && fault_pc != NULL && continuation_pc != NULL);
++    StubCodeMark mark(this, "StubRoutines", name);
 +
-+    Label again, drain;
-+    const char* stub_name = NULL;
-+    if (direction == copy_forwards) {
-+      stub_name = "forward_copy_longs";
-+    } else {
-+      stub_name = "backward_copy_longs";
-+    }
-+    StubCodeMark mark(this, "StubRoutines", stub_name);
-+    __ align(CodeEntryAlignment);
-+    __ bind(start);
++    // Entry point, pc or function descriptor.
++    *entry = __ pc();
 +
-+    if (direction == copy_forwards) {
-+      __ sub(s, s, bias);
-+      __ sub(d, d, bias);
++    // Load *adr into c_rarg1, may fault.
++    *fault_pc = __ pc();
++    switch (size) {
++      case 4:
++        // int32_t
++        __ lw(c_rarg1, Address(c_rarg0, 0));
++        break;
++      case 8:
++        // int64_t
++        __ ld(c_rarg1, Address(c_rarg0, 0));
++        break;
++      default:
++        ShouldNotReachHere();
 +    }
 +
-+#ifdef ASSERT
-+    // Make sure we are never given < 8 words
-+    {
-+      Label L;
++    // return errValue or *adr
++    *continuation_pc = __ pc();
++    __ mv(x10, c_rarg1);
++    __ ret();
++  }
 +
-+      __ li(t0, 8);
-+      __ bge(count, t0, L);
-+      __ stop("genrate_copy_longs called with < 8 words");
-+      __ bind(L);
-+    }
-+#endif
++  // code for comparing 16 bytes of strings with same encoding
++  void compare_string_16_bytes_same(Label &DIFF1, Label &DIFF2) {
++    const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, tmp1 = x28, tmp2 = x29, tmp4 = x7, tmp5 = x31;
++    __ ld(tmp5, Address(str1));
++    __ addi(str1, str1, 8);
++    __ xorr(tmp4, tmp1, tmp2);
++    __ ld(cnt1, Address(str2));
++    __ addi(str2, str2, 8);
++    __ bnez(tmp4, DIFF1);
++    __ ld(tmp1, Address(str1));
++    __ addi(str1, str1, 8);
++    __ xorr(tmp4, tmp5, cnt1);
++    __ ld(tmp2, Address(str2));
++    __ addi(str2, str2, 8);
++    __ bnez(tmp4, DIFF2);
++  }
 +
-+    __ ld(tmp_reg0, Address(s, 1 * unit));
-+    __ ld(tmp_reg1, Address(s, 2 * unit));
-+    __ ld(tmp_reg2, Address(s, 3 * unit));
-+    __ ld(tmp_reg3, Address(s, 4 * unit));
-+    __ ld(tmp_reg4, Address(s, 5 * unit));
-+    __ ld(tmp_reg5, Address(s, 6 * unit));
-+    __ ld(tmp_reg6, Address(s, 7 * unit));
-+    __ ld(tmp_reg7, Address(s, 8 * unit));
-+    __ addi(s, s, 8 * unit);
++  // code for comparing 8 characters of strings with Latin1 and Utf16 encoding
++  void compare_string_8_x_LU(Register tmpL, Register tmpU, Label &DIFF1,
++                              Label &DIFF2) {
++    const Register strU = x12, curU = x7, strL = x29, tmp = x30;
++    __ ld(tmpL, Address(strL));
++    __ addi(strL, strL, 8);
++    __ ld(tmpU, Address(strU));
++    __ addi(strU, strU, 8);
++    __ inflate_lo32(tmp, tmpL);
++    __ mv(t0, tmp);
++    __ xorr(tmp, curU, t0);
++    __ bnez(tmp, DIFF2);
 +
-+    __ sub(count, count, 16);
-+    __ bltz(count, drain);
++    __ ld(curU, Address(strU));
++    __ addi(strU, strU, 8);
++    __ inflate_hi32(tmp, tmpL);
++    __ mv(t0, tmp);
++    __ xorr(tmp, tmpU, t0);
++    __ bnez(tmp, DIFF1);
++  }
 +
-+    __ bind(again);
++  // x10  = result
++  // x11  = str1
++  // x12  = cnt1
++  // x13  = str2
++  // x14  = cnt2
++  // x28  = tmp1
++  // x29  = tmp2
++  // x30  = tmp3
++  address generate_compare_long_string_different_encoding(bool isLU) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", isLU ? "compare_long_string_different_encoding LU" : "compare_long_string_different_encoding UL");
++    address entry = __ pc();
++    Label SMALL_LOOP, TAIL, TAIL_LOAD_16, LOAD_LAST, DIFF1, DIFF2,
++          DONE, CALCULATE_DIFFERENCE;
++    const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, cnt2 = x14,
++                   tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x7, tmp5 = x31;
++    RegSet spilled_regs = RegSet::of(tmp4, tmp5);
 +
-+    __ sd(tmp_reg0, Address(d, 1 * unit));
-+    __ sd(tmp_reg1, Address(d, 2 * unit));
-+    __ sd(tmp_reg2, Address(d, 3 * unit));
-+    __ sd(tmp_reg3, Address(d, 4 * unit));
-+    __ sd(tmp_reg4, Address(d, 5 * unit));
-+    __ sd(tmp_reg5, Address(d, 6 * unit));
-+    __ sd(tmp_reg6, Address(d, 7 * unit));
-+    __ sd(tmp_reg7, Address(d, 8 * unit));
-+
-+    __ ld(tmp_reg0, Address(s, 1 * unit));
-+    __ ld(tmp_reg1, Address(s, 2 * unit));
-+    __ ld(tmp_reg2, Address(s, 3 * unit));
-+    __ ld(tmp_reg3, Address(s, 4 * unit));
-+    __ ld(tmp_reg4, Address(s, 5 * unit));
-+    __ ld(tmp_reg5, Address(s, 6 * unit));
-+    __ ld(tmp_reg6, Address(s, 7 * unit));
-+    __ ld(tmp_reg7, Address(s, 8 * unit));
-+
-+    __ addi(s, s, 8 * unit);
-+    __ addi(d, d, 8 * unit);
-+
-+    __ sub(count, count, 8);
-+    __ bgez(count, again);
++    // cnt2 == amount of characters left to compare
++    // Check already loaded first 4 symbols
++    __ inflate_lo32(tmp3, isLU ? tmp1 : tmp2);
++    __ mv(isLU ? tmp1 : tmp2, tmp3);
++    __ addi(str1, str1, isLU ? wordSize / 2 : wordSize);
++    __ addi(str2, str2, isLU ? wordSize : wordSize / 2);
++    __ sub(cnt2, cnt2, 8); // Already loaded 4 symbols. Last 4 is special case.
++    __ push_reg(spilled_regs, sp);
 +
-+    // Drain
-+    __ bind(drain);
++    if (isLU) {
++      __ add(str1, str1, cnt2);
++      __ shadd(str2, cnt2, str2, t0, 1);
++    } else {
++      __ shadd(str1, cnt2, str1, t0, 1);
++      __ add(str2, str2, cnt2);
++    }
++    __ xorr(tmp3, tmp1, tmp2);
++    __ mv(tmp5, tmp2);
++    __ bnez(tmp3, CALCULATE_DIFFERENCE);
 +
-+    __ sd(tmp_reg0, Address(d, 1 * unit));
-+    __ sd(tmp_reg1, Address(d, 2 * unit));
-+    __ sd(tmp_reg2, Address(d, 3 * unit));
-+    __ sd(tmp_reg3, Address(d, 4 * unit));
-+    __ sd(tmp_reg4, Address(d, 5 * unit));
-+    __ sd(tmp_reg5, Address(d, 6 * unit));
-+    __ sd(tmp_reg6, Address(d, 7 * unit));
-+    __ sd(tmp_reg7, Address(d, 8 * unit));
-+    __ addi(d, d, 8 * unit);
++    Register strU = isLU ? str2 : str1,
++             strL = isLU ? str1 : str2,
++             tmpU = isLU ? tmp5 : tmp1, // where to keep U for comparison
++             tmpL = isLU ? tmp1 : tmp5; // where to keep L for comparison
 +
-+    {
-+      Label L1, L2;
-+      __ andi(t0, count, 4);
-+      __ beqz(t0, L1);
++    __ sub(tmp2, strL, cnt2); // strL pointer to load from
++    __ slli(t0, cnt2, 1);
++    __ sub(cnt1, strU, t0); // strU pointer to load from
 +
-+      __ ld(tmp_reg0, Address(s, 1 * unit));
-+      __ ld(tmp_reg1, Address(s, 2 * unit));
-+      __ ld(tmp_reg2, Address(s, 3 * unit));
-+      __ ld(tmp_reg3, Address(s, 4 * unit));
-+      __ addi(s, s, 4 * unit);
++    __ ld(tmp4, Address(cnt1));
++    __ addi(cnt1, cnt1, 8);
++    __ beqz(cnt2, LOAD_LAST); // no characters left except last load
++    __ sub(cnt2, cnt2, 16);
++    __ bltz(cnt2, TAIL);
++    __ bind(SMALL_LOOP); // smaller loop
++      __ sub(cnt2, cnt2, 16);
++      compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2);
++      compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2);
++      __ bgez(cnt2, SMALL_LOOP);
++      __ addi(t0, cnt2, 16);
++      __ beqz(t0, LOAD_LAST);
++    __ bind(TAIL); // 1..15 characters left until last load (last 4 characters)
++      // Address of 8 bytes before last 4 characters in UTF-16 string
++      __ shadd(cnt1, cnt2, cnt1, t0, 1);
++      // Address of 16 bytes before last 4 characters in Latin1 string
++      __ add(tmp2, tmp2, cnt2);
++      __ ld(tmp4, Address(cnt1, -8));
++      // last 16 characters before last load
++      compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2);
++      compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2);
++      __ j(LOAD_LAST);
++    __ bind(DIFF2);
++      __ mv(tmpU, tmp4);
++    __ bind(DIFF1);
++      __ mv(tmpL, t0);
++      __ j(CALCULATE_DIFFERENCE);
++    __ bind(LOAD_LAST);
++      // Last 4 UTF-16 characters are already pre-loaded into tmp4 by compare_string_8_x_LU.
++      // No need to load it again
++      __ mv(tmpU, tmp4);
++      __ ld(tmpL, Address(strL));
++      __ inflate_lo32(tmp3, tmpL);
++      __ mv(tmpL, tmp3);
++      __ xorr(tmp3, tmpU, tmpL);
++      __ beqz(tmp3, DONE);
 +
-+      __ sd(tmp_reg0, Address(d, 1 * unit));
-+      __ sd(tmp_reg1, Address(d, 2 * unit));
-+      __ sd(tmp_reg2, Address(d, 3 * unit));
-+      __ sd(tmp_reg3, Address(d, 4 * unit));
-+      __ addi(d, d, 4 * unit);
++      // Find the first different characters in the longwords and
++      // compute their difference.
++    __ bind(CALCULATE_DIFFERENCE);
++      __ ctzc_bit(tmp4, tmp3);
++      __ srl(tmp1, tmp1, tmp4);
++      __ srl(tmp5, tmp5, tmp4);
++      __ andi(tmp1, tmp1, 0xFFFF);
++      __ andi(tmp5, tmp5, 0xFFFF);
++      __ sub(result, tmp1, tmp5);
++    __ bind(DONE);
++      __ pop_reg(spilled_regs, sp);
++      __ ret();
++    return entry;
++  }
 +
-+      __ bind(L1);
++  // x10  = result
++  // x11  = str1
++  // x12  = cnt1
++  // x13  = str2
++  // x14  = cnt2
++  // x28  = tmp1
++  // x29  = tmp2
++  // x30  = tmp3
++  // x31  = tmp4
++  address generate_compare_long_string_same_encoding(bool isLL) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", isLL ?
++                      "compare_long_string_same_encoding LL" : "compare_long_string_same_encoding UU");
++    address entry = __ pc();
++    Label SMALL_LOOP, CHECK_LAST, DIFF2, TAIL,
++          LENGTH_DIFF, DIFF, LAST_CHECK_AND_LENGTH_DIFF;
++    const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, cnt2 = x14,
++                   tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x7, tmp5 = x31;
++    RegSet spilled_regs = RegSet::of(tmp4, tmp5);
 +
-+      if (direction == copy_forwards) {
-+        __ addi(s, s, bias);
-+        __ addi(d, d, bias);
++    // cnt1/cnt2 contains amount of characters to compare. cnt1 can be re-used
++    // update cnt2 counter with already loaded 8 bytes
++    __ sub(cnt2, cnt2, wordSize / (isLL ? 1 : 2));
++    // update pointers, because of previous read
++    __ add(str1, str1, wordSize);
++    __ add(str2, str2, wordSize);
++    // less than 16 bytes left?
++    __ sub(cnt2, cnt2, isLL ? 16 : 8);
++    __ push_reg(spilled_regs, sp);
++    __ bltz(cnt2, TAIL);
++    __ bind(SMALL_LOOP);
++      compare_string_16_bytes_same(DIFF, DIFF2);
++      __ sub(cnt2, cnt2, isLL ? 16 : 8);
++      __ bgez(cnt2, SMALL_LOOP);
++    __ bind(TAIL);
++      __ addi(cnt2, cnt2, isLL ? 16 : 8);
++      __ beqz(cnt2, LAST_CHECK_AND_LENGTH_DIFF);
++      __ sub(cnt2, cnt2, isLL ? 8 : 4);
++      __ blez(cnt2, CHECK_LAST);
++      __ xorr(tmp4, tmp1, tmp2);
++      __ bnez(tmp4, DIFF);
++      __ ld(tmp1, Address(str1));
++      __ addi(str1, str1, 8);
++      __ ld(tmp2, Address(str2));
++      __ addi(str2, str2, 8);
++      __ sub(cnt2, cnt2, isLL ? 8 : 4);
++    __ bind(CHECK_LAST);
++      if (!isLL) {
++        __ add(cnt2, cnt2, cnt2); // now in bytes
 +      }
-+
-+      __ andi(t0, count, 2);
-+      __ beqz(t0, L2);
-+      if (direction == copy_backwards) {
-+        __ addi(s, s, 2 * unit);
-+        __ ld(tmp_reg0, Address(s));
-+        __ ld(tmp_reg1, Address(s, wordSize));
-+        __ addi(d, d, 2 * unit);
-+        __ sd(tmp_reg0, Address(d));
-+        __ sd(tmp_reg1, Address(d, wordSize));
++      __ xorr(tmp4, tmp1, tmp2);
++      __ bnez(tmp4, DIFF);
++      __ add(str1, str1, cnt2);
++      __ ld(tmp5, Address(str1));
++      __ add(str2, str2, cnt2);
++      __ ld(cnt1, Address(str2));
++      __ xorr(tmp4, tmp5, cnt1);
++      __ beqz(tmp4, LENGTH_DIFF);
++      // Find the first different characters in the longwords and
++      // compute their difference.
++    __ bind(DIFF2);
++      __ ctzc_bit(tmp3, tmp4, isLL); // count zero from lsb to msb
++      __ srl(tmp5, tmp5, tmp3);
++      __ srl(cnt1, cnt1, tmp3);
++      if (isLL) {
++        __ andi(tmp5, tmp5, 0xFF);
++        __ andi(cnt1, cnt1, 0xFF);
 +      } else {
-+        __ ld(tmp_reg0, Address(s));
-+        __ ld(tmp_reg1, Address(s, wordSize));
-+        __ addi(s, s, 2 * unit);
-+        __ sd(tmp_reg0, Address(d));
-+        __ sd(tmp_reg1, Address(d, wordSize));
-+        __ addi(d, d, 2 * unit);
++        __ andi(tmp5, tmp5, 0xFFFF);
++        __ andi(cnt1, cnt1, 0xFFFF);
 +      }
-+      __ bind(L2);
-+    }
++      __ sub(result, tmp5, cnt1);
++      __ j(LENGTH_DIFF);
++    __ bind(DIFF);
++      __ ctzc_bit(tmp3, tmp4, isLL); // count zero from lsb to msb
++      __ srl(tmp1, tmp1, tmp3);
++      __ srl(tmp2, tmp2, tmp3);
++      if (isLL) {
++        __ andi(tmp1, tmp1, 0xFF);
++        __ andi(tmp2, tmp2, 0xFF);
++      } else {
++        __ andi(tmp1, tmp1, 0xFFFF);
++        __ andi(tmp2, tmp2, 0xFFFF);
++      }
++      __ sub(result, tmp1, tmp2);
++      __ j(LENGTH_DIFF);
++    __ bind(LAST_CHECK_AND_LENGTH_DIFF);
++      __ xorr(tmp4, tmp1, tmp2);
++      __ bnez(tmp4, DIFF);
++    __ bind(LENGTH_DIFF);
++      __ pop_reg(spilled_regs, sp);
++      __ ret();
++    return entry;
++  }
 +
-+    __ ret();
++  void generate_compare_long_strings() {
++    StubRoutines::riscv::_compare_long_string_LL = generate_compare_long_string_same_encoding(true);
++    StubRoutines::riscv::_compare_long_string_UU = generate_compare_long_string_same_encoding(false);
++    StubRoutines::riscv::_compare_long_string_LU = generate_compare_long_string_different_encoding(true);
++    StubRoutines::riscv::_compare_long_string_UL = generate_compare_long_string_different_encoding(false);
 +  }
 +
-+  Label copy_f, copy_b;
++  // x10 result
++  // x11 src
++  // x12 src count
++  // x13 pattern
++  // x14 pattern count
++  address generate_string_indexof_linear(bool needle_isL, bool haystack_isL)
++  {
++    const char* stubName = needle_isL
++           ? (haystack_isL ? "indexof_linear_ll" : "indexof_linear_ul")
++           : "indexof_linear_uu";
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", stubName);
++    address entry = __ pc();
 +
-+  // All-singing all-dancing memory copy.
-+  //
-+  // Copy count units of memory from s to d.  The size of a unit is
-+  // step, which can be positive or negative depending on the direction
-+  // of copy.  If is_aligned is false, we align the source address.
-+  //
-+  /*
-+   * if (is_aligned) {
-+   *   goto copy_8_bytes;
-+   * }
-+   * bool is_backwards = step < 0;
-+   * int granularity = uabs(step);
-+   * count = count  *  granularity;   * count bytes
-+   *
-+   * if (is_backwards) {
-+   *   s += count;
-+   *   d += count;
-+   * }
-+   *
-+   * count limit maybe greater than 16, for better performance
-+   * if (count < 16) {
-+   *   goto copy_small;
-+   * }
-+   *
-+   * if ((dst % 8) == (src % 8)) {
-+   *   aligned;
-+   *   goto copy8;
-+   * }
-+   *
-+   * copy_small:
-+   *   load element one by one;
-+   * done;
-+   */
++    int needle_chr_size = needle_isL ? 1 : 2;
++    int haystack_chr_size = haystack_isL ? 1 : 2;
++    int needle_chr_shift = needle_isL ? 0 : 1;
++    int haystack_chr_shift = haystack_isL ? 0 : 1;
++    bool isL = needle_isL && haystack_isL;
++    // parameters
++    Register result = x10, haystack = x11, haystack_len = x12, needle = x13, needle_len = x14;
++    // temporary registers
++    Register mask1 = x20, match_mask = x21, first = x22, trailing_zeros = x23, mask2 = x24, tmp = x25;
++    // redefinitions
++    Register ch1 = x28, ch2 = x29;
++    RegSet spilled_regs = RegSet::range(x20, x25) + RegSet::range(x28, x29);
 +
-+  typedef void (MacroAssembler::*copy_insn)(Register Rd, const Address &adr, Register temp);
++    __ push_reg(spilled_regs, sp);
 +
-+  void copy_memory_v(Register s, Register d, Register count, Register tmp, int step) {
-+    bool is_backward = step < 0;
-+    int granularity = uabs(step);
++    Label L_LOOP, L_LOOP_PROCEED, L_SMALL, L_HAS_ZERO,
++          L_HAS_ZERO_LOOP, L_CMP_LOOP, L_CMP_LOOP_NOMATCH, L_SMALL_PROCEED,
++          L_SMALL_HAS_ZERO_LOOP, L_SMALL_CMP_LOOP_NOMATCH, L_SMALL_CMP_LOOP,
++          L_POST_LOOP, L_CMP_LOOP_LAST_CMP, L_HAS_ZERO_LOOP_NOMATCH,
++          L_SMALL_CMP_LOOP_LAST_CMP, L_SMALL_CMP_LOOP_LAST_CMP2,
++          L_CMP_LOOP_LAST_CMP2, DONE, NOMATCH;
 +
-+    const Register src = x30, dst = x31, vl = x14, cnt = x15, tmp1 = x16, tmp2 = x17;
-+    assert_different_registers(s, d, cnt, vl, tmp, tmp1, tmp2);
-+    Assembler::SEW sew = Assembler::elembytes_to_sew(granularity);
-+    Label loop_forward, loop_backward, done;
++    __ ld(ch1, Address(needle));
++    __ ld(ch2, Address(haystack));
++    // src.length - pattern.length
++    __ sub(haystack_len, haystack_len, needle_len);
 +
-+    __ mv(dst, d);
-+    __ mv(src, s);
-+    __ mv(cnt, count);
++    // first is needle[0]
++    __ andi(first, ch1, needle_isL ? 0xFF : 0xFFFF, first);
++    uint64_t mask0101 = UCONST64(0x0101010101010101);
++    uint64_t mask0001 = UCONST64(0x0001000100010001);
++    __ mv(mask1, haystack_isL ? mask0101 : mask0001);
++    __ mul(first, first, mask1);
++    uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f);
++    uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff);
++    __ mv(mask2, haystack_isL ? mask7f7f : mask7fff);
++    if (needle_isL != haystack_isL) {
++      __ mv(tmp, ch1);
++    }
++    __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size - 1);
++    __ blez(haystack_len, L_SMALL);
 +
-+    __ bind(loop_forward);
-+    __ vsetvli(vl, cnt, sew, Assembler::m8);
-+    if (is_backward) {
-+      __ bne(vl, cnt, loop_backward);
++    if (needle_isL != haystack_isL) {
++      __ inflate_lo32(ch1, tmp, match_mask, trailing_zeros);
 +    }
++    // xorr, sub, orr, notr, andr
++    // compare and set match_mask[i] with 0x80/0x8000 (Latin1/UTF16) if ch2[i] == first[i]
++    // eg:
++    // first:        aa aa aa aa aa aa aa aa
++    // ch2:          aa aa li nx jd ka aa aa
++    // match_mask:   80 80 00 00 00 00 80 80
++    __ compute_match_mask(ch2, first, match_mask, mask1, mask2);
 +
-+    __ vlex_v(v0, src, sew);
-+    __ sub(cnt, cnt, vl);
-+    __ slli(vl, vl, (int)sew);
-+    __ add(src, src, vl);
++    // search first char of needle, if success, goto L_HAS_ZERO;
++    __ bnez(match_mask, L_HAS_ZERO);
++    __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size);
++    __ add(result, result, wordSize / haystack_chr_size);
++    __ add(haystack, haystack, wordSize);
++    __ bltz(haystack_len, L_POST_LOOP);
 +
-+    __ vsex_v(v0, dst, sew);
-+    __ add(dst, dst, vl);
-+    __ bnez(cnt, loop_forward);
++    __ bind(L_LOOP);
++    __ ld(ch2, Address(haystack));
++    __ compute_match_mask(ch2, first, match_mask, mask1, mask2);
++    __ bnez(match_mask, L_HAS_ZERO);
 +
-+    if (is_backward) {
-+      __ j(done);
++    __ bind(L_LOOP_PROCEED);
++    __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size);
++    __ add(haystack, haystack, wordSize);
++    __ add(result, result, wordSize / haystack_chr_size);
++    __ bgez(haystack_len, L_LOOP);
 +
-+      __ bind(loop_backward);
-+      __ sub(tmp, cnt, vl);
-+      __ slli(tmp, tmp, sew);
-+      __ add(tmp1, s, tmp);
-+      __ vlex_v(v0, tmp1, sew);
-+      __ add(tmp2, d, tmp);
-+      __ vsex_v(v0, tmp2, sew);
-+      __ sub(cnt, cnt, vl);
-+      __ bnez(cnt, loop_forward);
-+      __ bind(done);
-+    }
-+  }
++    __ bind(L_POST_LOOP);
++    __ mv(ch2, -wordSize / haystack_chr_size);
++    __ ble(haystack_len, ch2, NOMATCH); // no extra characters to check
++    __ ld(ch2, Address(haystack));
++    __ slli(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift);
++    __ neg(haystack_len, haystack_len);
++    __ xorr(ch2, first, ch2);
++    __ sub(match_mask, ch2, mask1);
++    __ orr(ch2, ch2, mask2);
++    __ mv(trailing_zeros, -1); // all bits set
++    __ j(L_SMALL_PROCEED);
 +
-+  void copy_memory(bool is_aligned, Register s, Register d,
-+                   Register count, Register tmp, int step) {
-+    if (UseRVV) {
-+      return copy_memory_v(s, d, count, tmp, step);
++    __ align(OptoLoopAlignment);
++    __ bind(L_SMALL);
++    __ slli(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift);
++    __ neg(haystack_len, haystack_len);
++    if (needle_isL != haystack_isL) {
++      __ inflate_lo32(ch1, tmp, match_mask, trailing_zeros);
 +    }
++    __ xorr(ch2, first, ch2);
++    __ sub(match_mask, ch2, mask1);
++    __ orr(ch2, ch2, mask2);
++    __ mv(trailing_zeros, -1); // all bits set
 +
-+    bool is_backwards = step < 0;
-+    int granularity = uabs(step);
++    __ bind(L_SMALL_PROCEED);
++    __ srl(trailing_zeros, trailing_zeros, haystack_len); // mask. zeroes on useless bits.
++    __ notr(ch2, ch2);
++    __ andr(match_mask, match_mask, ch2);
++    __ andr(match_mask, match_mask, trailing_zeros); // clear useless bits and check
++    __ beqz(match_mask, NOMATCH);
 +
-+    const Register src = x30, dst = x31, cnt = x15, tmp3 = x16, tmp4 = x17;
++    __ bind(L_SMALL_HAS_ZERO_LOOP);
++    __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, ch2, tmp); // count trailing zeros
++    __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
++    __ mv(ch2, wordSize / haystack_chr_size);
++    __ ble(needle_len, ch2, L_SMALL_CMP_LOOP_LAST_CMP2);
++    __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
++    __ mv(trailing_zeros, wordSize / haystack_chr_size);
++    __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH);
 +
-+    Label same_aligned;
-+    Label copy8, copy_small, done;
++    __ bind(L_SMALL_CMP_LOOP);
++    __ shadd(first, trailing_zeros, needle, first, needle_chr_shift);
++    __ shadd(ch2, trailing_zeros, haystack, ch2, haystack_chr_shift);
++    needle_isL ? __ lbu(first, Address(first)) : __ lhu(first, Address(first));
++    haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2));
++    __ add(trailing_zeros, trailing_zeros, 1);
++    __ bge(trailing_zeros, needle_len, L_SMALL_CMP_LOOP_LAST_CMP);
++    __ beq(first, ch2, L_SMALL_CMP_LOOP);
 +
-+    copy_insn ld_arr = NULL, st_arr = NULL;
-+    switch (granularity) {
-+      case 1 :
-+        ld_arr = (copy_insn)&MacroAssembler::lbu;
-+        st_arr = (copy_insn)&MacroAssembler::sb;
-+        break;
-+      case 2 :
-+        ld_arr = (copy_insn)&MacroAssembler::lhu;
-+        st_arr = (copy_insn)&MacroAssembler::sh;
-+        break;
-+      case 4 :
-+        ld_arr = (copy_insn)&MacroAssembler::lwu;
-+        st_arr = (copy_insn)&MacroAssembler::sw;
-+        break;
-+      case 8 :
-+        ld_arr = (copy_insn)&MacroAssembler::ld;
-+        st_arr = (copy_insn)&MacroAssembler::sd;
-+        break;
-+      default :
-+        ShouldNotReachHere();
-+    }
++    __ bind(L_SMALL_CMP_LOOP_NOMATCH);
++    __ beqz(match_mask, NOMATCH);
++    __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, tmp, ch2);
++    __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
++    __ add(result, result, 1);
++    __ add(haystack, haystack, haystack_chr_size);
++    __ j(L_SMALL_HAS_ZERO_LOOP);
 +
-+    __ beqz(count, done);
-+    __ slli(cnt, count, exact_log2(granularity));
-+    if (is_backwards) {
-+      __ add(src, s, cnt);
-+      __ add(dst, d, cnt);
-+    } else {
-+      __ mv(src, s);
-+      __ mv(dst, d);
-+    }
++    __ align(OptoLoopAlignment);
++    __ bind(L_SMALL_CMP_LOOP_LAST_CMP);
++    __ bne(first, ch2, L_SMALL_CMP_LOOP_NOMATCH);
++    __ j(DONE);
 +
-+    if (is_aligned) {
-+      __ addi(tmp, cnt, -8);
-+      __ bgez(tmp, copy8);
-+      __ j(copy_small);
-+    }
++    __ align(OptoLoopAlignment);
++    __ bind(L_SMALL_CMP_LOOP_LAST_CMP2);
++    __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
++    __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH);
++    __ j(DONE);
 +
-+    __ mv(tmp, 16);
-+    __ blt(cnt, tmp, copy_small);
++    __ align(OptoLoopAlignment);
++    __ bind(L_HAS_ZERO);
++    __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, tmp, ch2);
++    __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
++    __ slli(needle_len, needle_len, BitsPerByte * wordSize / 2);
++    __ orr(haystack_len, haystack_len, needle_len); // restore needle_len(32bits)
++    __ sub(result, result, 1); // array index from 0, so result -= 1
 +
-+    __ xorr(tmp, src, dst);
-+    __ andi(tmp, tmp, 0b111);
-+    __ bnez(tmp, copy_small);
++    __ bind(L_HAS_ZERO_LOOP);
++    __ mv(needle_len, wordSize / haystack_chr_size);
++    __ srli(ch2, haystack_len, BitsPerByte * wordSize / 2);
++    __ bge(needle_len, ch2, L_CMP_LOOP_LAST_CMP2);
++    // load next 8 bytes from haystack, and increase result index
++    __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
++    __ add(result, result, 1);
++    __ mv(trailing_zeros, wordSize / haystack_chr_size);
++    __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH);
 +
-+    __ bind(same_aligned);
-+    __ andi(tmp, src, 0b111);
-+    __ beqz(tmp, copy8);
-+    if (is_backwards) {
-+      __ addi(src, src, step);
-+      __ addi(dst, dst, step);
-+    }
-+    (_masm->*ld_arr)(tmp3, Address(src), t0);
-+    (_masm->*st_arr)(tmp3, Address(dst), t0);
-+    if (!is_backwards) {
-+      __ addi(src, src, step);
-+      __ addi(dst, dst, step);
-+    }
-+    __ addi(cnt, cnt, -granularity);
-+    __ beqz(cnt, done);
-+    __ j(same_aligned);
++    // compare one char
++    __ bind(L_CMP_LOOP);
++    __ shadd(needle_len, trailing_zeros, needle, needle_len, needle_chr_shift);
++    needle_isL ? __ lbu(needle_len, Address(needle_len)) : __ lhu(needle_len, Address(needle_len));
++    __ shadd(ch2, trailing_zeros, haystack, ch2, haystack_chr_shift);
++    haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2));
++    __ add(trailing_zeros, trailing_zeros, 1); // next char index
++    __ srli(tmp, haystack_len, BitsPerByte * wordSize / 2);
++    __ bge(trailing_zeros, tmp, L_CMP_LOOP_LAST_CMP);
++    __ beq(needle_len, ch2, L_CMP_LOOP);
 +
-+    __ bind(copy8);
-+    if (is_backwards) {
-+      __ addi(src, src, -wordSize);
-+      __ addi(dst, dst, -wordSize);
-+    }
-+    __ ld(tmp3, Address(src));
-+    __ sd(tmp3, Address(dst));
-+    if (!is_backwards) {
-+      __ addi(src, src, wordSize);
-+      __ addi(dst, dst, wordSize);
-+    }
-+    __ addi(cnt, cnt, -wordSize);
-+    __ addi(tmp4, cnt, -8);
-+    __ bgez(tmp4, copy8); // cnt >= 8, do next loop
++    __ bind(L_CMP_LOOP_NOMATCH);
++    __ beqz(match_mask, L_HAS_ZERO_LOOP_NOMATCH);
++    __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, needle_len, ch2); // find next "first" char index
++    __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
++    __ add(haystack, haystack, haystack_chr_size);
++    __ j(L_HAS_ZERO_LOOP);
 +
-+    __ beqz(cnt, done);
++    __ align(OptoLoopAlignment);
++    __ bind(L_CMP_LOOP_LAST_CMP);
++    __ bne(needle_len, ch2, L_CMP_LOOP_NOMATCH);
++    __ j(DONE);
 +
-+    __ bind(copy_small);
-+    if (is_backwards) {
-+      __ addi(src, src, step);
-+      __ addi(dst, dst, step);
-+    }
-+    (_masm->*ld_arr)(tmp3, Address(src), t0);
-+    (_masm->*st_arr)(tmp3, Address(dst), t0);
-+    if (!is_backwards) {
-+      __ addi(src, src, step);
-+      __ addi(dst, dst, step);
-+    }
-+    __ addi(cnt, cnt, -granularity);
-+    __ bgtz(cnt, copy_small);
++    __ align(OptoLoopAlignment);
++    __ bind(L_CMP_LOOP_LAST_CMP2);
++    __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
++    __ add(result, result, 1);
++    __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH);
++    __ j(DONE);
 +
-+    __ bind(done);
-+  }
++    __ align(OptoLoopAlignment);
++    __ bind(L_HAS_ZERO_LOOP_NOMATCH);
++    // 1) Restore "result" index. Index was wordSize/str2_chr_size * N until
++    // L_HAS_ZERO block. Byte octet was analyzed in L_HAS_ZERO_LOOP,
++    // so, result was increased at max by wordSize/str2_chr_size - 1, so,
++    // respective high bit wasn't changed. L_LOOP_PROCEED will increase
++    // result by analyzed characters value, so, we can just reset lower bits
++    // in result here. Clear 2 lower bits for UU/UL and 3 bits for LL
++    // 2) restore needle_len and haystack_len values from "compressed" haystack_len
++    // 3) advance haystack value to represent next haystack octet. result & 7/3 is
++    // index of last analyzed substring inside current octet. So, haystack in at
++    // respective start address. We need to advance it to next octet
++    __ andi(match_mask, result, wordSize / haystack_chr_size - 1);
++    __ srli(needle_len, haystack_len, BitsPerByte * wordSize / 2);
++    __ andi(result, result, haystack_isL ? -8 : -4);
++    __ slli(tmp, match_mask, haystack_chr_shift);
++    __ sub(haystack, haystack, tmp);
++    __ addw(haystack_len, haystack_len, zr);
++    __ j(L_LOOP_PROCEED);
 +
-+  // Scan over array at a for count oops, verifying each one.
-+  // Preserves a and count, clobbers t0 and t1.
-+  void verify_oop_array(size_t size, Register a, Register count, Register temp) {
-+    Label loop, end;
-+    __ mv(t1, zr);
-+    __ slli(t0, count, exact_log2(size));
-+    __ bind(loop);
-+    __ bgeu(t1, t0, end);
++    __ align(OptoLoopAlignment);
++    __ bind(NOMATCH);
++    __ mv(result, -1);
 +
-+    __ add(temp, a, t1);
-+    if (size == (size_t)wordSize) {
-+      __ ld(temp, Address(temp, 0));
-+      __ verify_oop(temp);
-+    } else {
-+      __ lwu(temp, Address(temp, 0));
-+      __ decode_heap_oop(temp); // calls verify_oop
-+    }
-+    __ add(t1, t1, size);
-+    __ j(loop);
-+    __ bind(end);
++    __ bind(DONE);
++    __ pop_reg(spilled_regs, sp);
++    __ ret();
++    return entry;
 +  }
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
-+  //             ignored
-+  //   is_oop  - true => oop array, so generate store check code
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as ssize_t, can be zero
-+  //
-+  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
-+  // the hardware handle it.  The two dwords within qwords that span
-+  // cache line boundaries will still be loaded and stored atomicly.
-+  //
-+  // Side Effects:
-+  //   disjoint_int_copy_entry is set to the no-overlap entry point
-+  //   used by generate_conjoint_int_oop_copy().
-+  //
-+  address generate_disjoint_copy(size_t size, bool aligned, bool is_oop, address* entry,
-+                                 const char* name, bool dest_uninitialized = false) {
-+    const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
-+    RegSet saved_reg = RegSet::of(s, d, count);
++  void generate_string_indexof_stubs()
++  {
++    StubRoutines::riscv::_string_indexof_linear_ll = generate_string_indexof_linear(true, true);
++    StubRoutines::riscv::_string_indexof_linear_uu = generate_string_indexof_linear(false, false);
++    StubRoutines::riscv::_string_indexof_linear_ul = generate_string_indexof_linear(true, false);
++  }
++
++#ifdef COMPILER2
++  address generate_mulAdd()
++  {
 +    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", name);
-+    address start = __ pc();
++    StubCodeMark mark(this, "StubRoutines", "mulAdd");
++
++    address entry = __ pc();
++
++    const Register out     = x10;
++    const Register in      = x11;
++    const Register offset  = x12;
++    const Register len     = x13;
++    const Register k       = x14;
++    const Register tmp     = x28;
++
++    BLOCK_COMMENT("Entry:");
 +    __ enter();
++    __ mul_add(out, in, offset, len, k, tmp);
++    __ leave();
++    __ ret();
 +
-+    if (entry != NULL) {
-+      *entry = __ pc();
-+      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
-+      BLOCK_COMMENT("Entry:");
-+    }
++    return entry;
++  }
 +
-+    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
-+    if (dest_uninitialized) {
-+      decorators |= IS_DEST_UNINITIALIZED;
-+    }
-+    if (aligned) {
-+      decorators |= ARRAYCOPY_ALIGNED;
-+    }
++  /**
++   *  Arguments:
++   *
++   *  Input:
++   *    c_rarg0   - x address
++   *    c_rarg1   - x length
++   *    c_rarg2   - y address
++   *    c_rarg3   - y length
++   *    c_rarg4   - z address
++   *    c_rarg5   - z length
++   */
++  address generate_multiplyToLen()
++  {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
++    address entry = __ pc();
 +
-+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+    bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_reg);
++    const Register x     = x10;
++    const Register xlen  = x11;
++    const Register y     = x12;
++    const Register ylen  = x13;
++    const Register z     = x14;
++    const Register zlen  = x15;
 +
-+    if (is_oop) {
-+      // save regs before copy_memory
-+      __ push_reg(RegSet::of(d, count), sp);
-+    }
++    const Register tmp1  = x16;
++    const Register tmp2  = x17;
++    const Register tmp3  = x7;
++    const Register tmp4  = x28;
++    const Register tmp5  = x29;
++    const Register tmp6  = x30;
++    const Register tmp7  = x31;
 +
-+    {
-+      // UnsafeCopyMemory page error: continue after ucm
-+      bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size);
-+      UnsafeCopyMemoryMark ucmm(this, add_entry, true);
-+      copy_memory(aligned, s, d, count, t0, size);
-+    }
++    BLOCK_COMMENT("Entry:");
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++    __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
++    __ leave(); // required for proper stackwalking of RuntimeStub frame
++    __ ret();
 +
-+    if (is_oop) {
-+      __ pop_reg(RegSet::of(d, count), sp);
-+      if (VerifyOops) {
-+        verify_oop_array(size, d, count, t2);
-+      }
-+    }
++    return entry;
++  }
 +
-+    bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, RegSet());
++  address generate_squareToLen()
++  {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "squareToLen");
++    address entry = __ pc();
++
++    const Register x     = x10;
++    const Register xlen  = x11;
++    const Register z     = x12;
++    const Register zlen  = x13;
++    const Register y     = x14; // == x
++    const Register ylen  = x15; // == xlen
 +
++    const Register tmp1  = x16;
++    const Register tmp2  = x17;
++    const Register tmp3  = x7;
++    const Register tmp4  = x28;
++    const Register tmp5  = x29;
++    const Register tmp6  = x30;
++    const Register tmp7  = x31;
++
++    BLOCK_COMMENT("Entry:");
++    __ enter();
++    __ mv(y, x);
++    __ mv(ylen, xlen);
++    __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
 +    __ leave();
-+    __ mv(x10, zr); // return 0
 +    __ ret();
-+    return start;
++
++    return entry;
 +  }
++#endif
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
-+  //             ignored
-+  //   is_oop  - true => oop array, so generate store check code
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as ssize_t, can be zero
-+  //
-+  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
-+  // the hardware handle it.  The two dwords within qwords that span
-+  // cache line boundaries will still be loaded and stored atomicly.
-+  //
-+  address generate_conjoint_copy(size_t size, bool aligned, bool is_oop, address nooverlap_target,
-+                                 address* entry, const char* name,
-+                                 bool dest_uninitialized = false) {
-+    const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
-+    RegSet saved_regs = RegSet::of(s, d, count);
-+    StubCodeMark mark(this, "StubRoutines", name);
-+    address start = __ pc();
-+    __ enter();
++#ifdef COMPILER2
++  class MontgomeryMultiplyGenerator : public MacroAssembler {
 +
-+    if (entry != NULL) {
-+      *entry = __ pc();
-+      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
-+      BLOCK_COMMENT("Entry:");
-+    }
++    Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Ra, Rb, Rm, Rn,
++      Pa, Pb, Pn, Pm, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2, Ri, Rj;
 +
-+    // use fwd copy when (d-s) above_equal (count*size)
-+    __ sub(t0, d, s);
-+    __ slli(t1, count, exact_log2(size));
-+    __ bgeu(t0, t1, nooverlap_target);
++    RegSet _toSave;
++    bool _squaring;
 +
-+    DecoratorSet decorators = IN_HEAP | IS_ARRAY;
-+    if (dest_uninitialized) {
-+      decorators |= IS_DEST_UNINITIALIZED;
++  public:
++    MontgomeryMultiplyGenerator (Assembler *as, bool squaring)
++      : MacroAssembler(as->code()), _squaring(squaring) {
++
++      // Register allocation
++
++      Register reg = c_rarg0;
++      Pa_base = reg;       // Argument registers
++      if (squaring) {
++        Pb_base = Pa_base;
++      } else {
++        Pb_base = ++reg;
++      }
++      Pn_base = ++reg;
++      Rlen= ++reg;
++      inv = ++reg;
++      Pm_base = ++reg;
++
++                        // Working registers:
++      Ra =  ++reg;      // The current digit of a, b, n, and m.
++      Rb =  ++reg;
++      Rm =  ++reg;
++      Rn =  ++reg;
++
++      Pa =  ++reg;      // Pointers to the current/next digit of a, b, n, and m.
++      Pb =  ++reg;
++      Pm =  ++reg;
++      Pn =  ++reg;
++
++      tmp0 =  ++reg;    // Three registers which form a
++      tmp1 =  ++reg;    // triple-precision accumuator.
++      tmp2 =  ++reg;
++
++      Ri =  x6;         // Inner and outer loop indexes.
++      Rj =  x7;
++
++      Rhi_ab = x28;     // Product registers: low and high parts
++      Rlo_ab = x29;     // of a*b and m*n.
++      Rhi_mn = x30;
++      Rlo_mn = x31;
++
++      // x18 and up are callee-saved.
++      _toSave = RegSet::range(x18, reg) + Pm_base;
 +    }
-+    if (aligned) {
-+      decorators |= ARRAYCOPY_ALIGNED;
++
++  private:
++    void save_regs() {
++      push_reg(_toSave, sp);
 +    }
 +
-+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+    bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_regs);
++    void restore_regs() {
++      pop_reg(_toSave, sp);
++    }
 +
-+    if (is_oop) {
-+      // save regs before copy_memory
-+      __ push_reg(RegSet::of(d, count), sp);
++    template <typename T>
++    void unroll_2(Register count, T block) {
++      Label loop, end, odd;
++      beqz(count, end);
++      andi(t0, count, 0x1);
++      bnez(t0, odd);
++      align(16);
++      bind(loop);
++      (this->*block)();
++      bind(odd);
++      (this->*block)();
++      addi(count, count, -2);
++      bgtz(count, loop);
++      bind(end);
 +    }
 +
-+    {
-+      // UnsafeCopyMemory page error: continue after ucm
-+      bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size);
-+      UnsafeCopyMemoryMark ucmm(this, add_entry, true);
-+      copy_memory(aligned, s, d, count, t0, -size);
++    template <typename T>
++    void unroll_2(Register count, T block, Register d, Register s, Register tmp) {
++      Label loop, end, odd;
++      beqz(count, end);
++      andi(tmp, count, 0x1);
++      bnez(tmp, odd);
++      align(16);
++      bind(loop);
++      (this->*block)(d, s, tmp);
++      bind(odd);
++      (this->*block)(d, s, tmp);
++      addi(count, count, -2);
++      bgtz(count, loop);
++      bind(end);
 +    }
 +
-+    if (is_oop) {
-+      __ pop_reg(RegSet::of(d, count), sp);
-+      if (VerifyOops) {
-+        verify_oop_array(size, d, count, t2);
++    void pre1(RegisterOrConstant i) {
++      block_comment("pre1");
++      // Pa = Pa_base;
++      // Pb = Pb_base + i;
++      // Pm = Pm_base;
++      // Pn = Pn_base + i;
++      // Ra = *Pa;
++      // Rb = *Pb;
++      // Rm = *Pm;
++      // Rn = *Pn;
++      if (i.is_register()) {
++        slli(t0, i.as_register(), LogBytesPerWord);
++      } else {
++        mv(t0, i.as_constant());
++        slli(t0, t0, LogBytesPerWord);
 +      }
-+    }
-+    bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, RegSet());
-+    __ leave();
-+    __ mv(x10, zr); // return 0
-+    __ ret();
-+    return start;
-+  }
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
-+  //             ignored
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as ssize_t, can be zero
-+  //
-+  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
-+  // we let the hardware handle it.  The one to eight bytes within words,
-+  // dwords or qwords that span cache line boundaries will still be loaded
-+  // and stored atomically.
-+  //
-+  // Side Effects:
-+  //   disjoint_byte_copy_entry is set to the no-overlap entry point  //
-+  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
-+  // we let the hardware handle it.  The one to eight bytes within words,
-+  // dwords or qwords that span cache line boundaries will still be loaded
-+  // and stored atomically.
-+  //
-+  // Side Effects:
-+  //   disjoint_byte_copy_entry is set to the no-overlap entry point
-+  //   used by generate_conjoint_byte_copy().
-+  //
-+  address generate_disjoint_byte_copy(bool aligned, address* entry, const char* name) {
-+    const bool not_oop = false;
-+    return generate_disjoint_copy(sizeof (jbyte), aligned, not_oop, entry, name);
-+  }
++      mv(Pa, Pa_base);
++      add(Pb, Pb_base, t0);
++      mv(Pm, Pm_base);
++      add(Pn, Pn_base, t0);
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
-+  //             ignored
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as ssize_t, can be zero
-+  //
-+  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
-+  // we let the hardware handle it.  The one to eight bytes within words,
-+  // dwords or qwords that span cache line boundaries will still be loaded
-+  // and stored atomically.
-+  //
-+  address generate_conjoint_byte_copy(bool aligned, address nooverlap_target,
-+                                      address* entry, const char* name) {
-+    const bool not_oop = false;
-+    return generate_conjoint_copy(sizeof (jbyte), aligned, not_oop, nooverlap_target, entry, name);
-+  }
++      ld(Ra, Address(Pa));
++      ld(Rb, Address(Pb));
++      ld(Rm, Address(Pm));
++      ld(Rn, Address(Pn));
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
-+  //             ignored
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as ssize_t, can be zero
-+  //
-+  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
-+  // let the hardware handle it.  The two or four words within dwords
-+  // or qwords that span cache line boundaries will still be loaded
-+  // and stored atomically.
-+  //
-+  // Side Effects:
-+  //   disjoint_short_copy_entry is set to the no-overlap entry point
-+  //   used by generate_conjoint_short_copy().
-+  //
-+  address generate_disjoint_short_copy(bool aligned,
-+                                       address* entry, const char* name) {
-+    const bool not_oop = false;
-+    return generate_disjoint_copy(sizeof (jshort), aligned, not_oop, entry, name);
-+  }
++      // Zero the m*n result.
++      mv(Rhi_mn, zr);
++      mv(Rlo_mn, zr);
++    }
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
-+  //             ignored
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as ssize_t, can be zero
-+  //
-+  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
-+  // let the hardware handle it.  The two or four words within dwords
-+  // or qwords that span cache line boundaries will still be loaded
-+  // and stored atomically.
-+  //
-+  address generate_conjoint_short_copy(bool aligned, address nooverlap_target,
-+                                       address* entry, const char* name) {
-+    const bool not_oop = false;
-+    return generate_conjoint_copy(sizeof (jshort), aligned, not_oop, nooverlap_target, entry, name);
-+  }
++    // The core multiply-accumulate step of a Montgomery
++    // multiplication.  The idea is to schedule operations as a
++    // pipeline so that instructions with long latencies (loads and
++    // multiplies) have time to complete before their results are
++    // used.  This most benefits in-order implementations of the
++    // architecture but out-of-order ones also benefit.
++    void step() {
++      block_comment("step");
++      // MACC(Ra, Rb, tmp0, tmp1, tmp2);
++      // Ra = *++Pa;
++      // Rb = *--Pb;
++      mulhu(Rhi_ab, Ra, Rb);
++      mul(Rlo_ab, Ra, Rb);
++      addi(Pa, Pa, wordSize);
++      ld(Ra, Address(Pa));
++      addi(Pb, Pb, -wordSize);
++      ld(Rb, Address(Pb));
++      acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n from the
++                                            // previous iteration.
++      // MACC(Rm, Rn, tmp0, tmp1, tmp2);
++      // Rm = *++Pm;
++      // Rn = *--Pn;
++      mulhu(Rhi_mn, Rm, Rn);
++      mul(Rlo_mn, Rm, Rn);
++      addi(Pm, Pm, wordSize);
++      ld(Rm, Address(Pm));
++      addi(Pn, Pn, -wordSize);
++      ld(Rn, Address(Pn));
++      acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
++    }
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
-+  //             ignored
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as ssize_t, can be zero
-+  //
-+  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
-+  // the hardware handle it.  The two dwords within qwords that span
-+  // cache line boundaries will still be loaded and stored atomicly.
-+  //
-+  // Side Effects:
-+  //   disjoint_int_copy_entry is set to the no-overlap entry point
-+  //   used by generate_conjoint_int_oop_copy().
-+  //
-+  address generate_disjoint_int_copy(bool aligned, address* entry,
-+                                     const char* name, bool dest_uninitialized = false) {
-+    const bool not_oop = false;
-+    return generate_disjoint_copy(sizeof (jint), aligned, not_oop, entry, name);
-+  }
++    void post1() {
++      block_comment("post1");
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
-+  //             ignored
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as ssize_t, can be zero
-+  //
-+  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
-+  // the hardware handle it.  The two dwords within qwords that span
-+  // cache line boundaries will still be loaded and stored atomicly.
-+  //
-+  address generate_conjoint_int_copy(bool aligned, address nooverlap_target,
-+                                     address* entry, const char* name,
-+                                     bool dest_uninitialized = false) {
-+    const bool not_oop = false;
-+    return generate_conjoint_copy(sizeof (jint), aligned, not_oop, nooverlap_target, entry, name);
-+  }
++      // MACC(Ra, Rb, tmp0, tmp1, tmp2);
++      // Ra = *++Pa;
++      // Rb = *--Pb;
++      mulhu(Rhi_ab, Ra, Rb);
++      mul(Rlo_ab, Ra, Rb);
++      acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2);  // The pending m*n
++      acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
 +
++      // *Pm = Rm = tmp0 * inv;
++      mul(Rm, tmp0, inv);
++      sd(Rm, Address(Pm));
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
-+  //             ignored
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as size_t, can be zero
-+  //
-+  // Side Effects:
-+  //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
-+  //   no-overlap entry point used by generate_conjoint_long_oop_copy().
-+  //
-+  address generate_disjoint_long_copy(bool aligned, address* entry,
-+                                      const char* name, bool dest_uninitialized = false) {
-+    const bool not_oop = false;
-+    return generate_disjoint_copy(sizeof (jlong), aligned, not_oop, entry, name);
-+  }
++      // MACC(Rm, Rn, tmp0, tmp1, tmp2);
++      // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0;
++      mulhu(Rhi_mn, Rm, Rn);
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
-+  //             ignored
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as size_t, can be zero
-+  //
-+  address generate_conjoint_long_copy(bool aligned,
-+                                      address nooverlap_target, address* entry,
-+                                      const char* name, bool dest_uninitialized = false) {
-+    const bool not_oop = false;
-+    return generate_conjoint_copy(sizeof (jlong), aligned, not_oop, nooverlap_target, entry, name);
-+  }
++#ifndef PRODUCT
++      // assert(m[i] * n[0] + tmp0 == 0, "broken Montgomery multiply");
++      {
++        mul(Rlo_mn, Rm, Rn);
++        add(Rlo_mn, tmp0, Rlo_mn);
++        Label ok;
++        beqz(Rlo_mn, ok);
++        stop("broken Montgomery multiply");
++        bind(ok);
++      }
++#endif
++      // We have very carefully set things up so that
++      // m[i]*n[0] + tmp0 == 0 (mod b), so we don't have to calculate
++      // the lower half of Rm * Rn because we know the result already:
++      // it must be -tmp0.  tmp0 + (-tmp0) must generate a carry iff
++      // tmp0 != 0.  So, rather than do a mul and an cad we just set
++      // the carry flag iff tmp0 is nonzero.
++      //
++      // mul(Rlo_mn, Rm, Rn);
++      // cad(zr, tmp0, Rlo_mn);
++      addi(t0, tmp0, -1);
++      sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero
++      cadc(tmp0, tmp1, Rhi_mn, t0);
++      adc(tmp1, tmp2, zr, t0);
++      mv(tmp2, zr);
++    }
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
-+  //             ignored
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as size_t, can be zero
-+  //
-+  // Side Effects:
-+  //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
-+  //   no-overlap entry point used by generate_conjoint_long_oop_copy().
-+  //
-+  address generate_disjoint_oop_copy(bool aligned, address* entry,
-+                                     const char* name, bool dest_uninitialized) {
-+    const bool is_oop = true;
-+    const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
-+    return generate_disjoint_copy(size, aligned, is_oop, entry, name, dest_uninitialized);
-+  }
++    void pre2(Register i, Register len) {
++      block_comment("pre2");
++      // Pa = Pa_base + i-len;
++      // Pb = Pb_base + len;
++      // Pm = Pm_base + i-len;
++      // Pn = Pn_base + len;
 +
-+  // Arguments:
-+  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
-+  //             ignored
-+  //   name    - stub name string
-+  //
-+  // Inputs:
-+  //   c_rarg0   - source array address
-+  //   c_rarg1   - destination array address
-+  //   c_rarg2   - element count, treated as size_t, can be zero
-+  //
-+  address generate_conjoint_oop_copy(bool aligned,
-+                                     address nooverlap_target, address* entry,
-+                                     const char* name, bool dest_uninitialized) {
-+    const bool is_oop = true;
-+    const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
-+    return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry,
-+                                  name, dest_uninitialized);
-+  }
++      sub(Rj, i, len);
++      // Rj == i-len
 +
-+  // Helper for generating a dynamic type check.
-+  // Smashes t0, t1.
-+  void generate_type_check(Register sub_klass,
-+                           Register super_check_offset,
-+                           Register super_klass,
-+                           Label& L_success) {
-+    assert_different_registers(sub_klass, super_check_offset, super_klass);
++      // Ra as temp register
++      slli(Ra, Rj, LogBytesPerWord);
++      add(Pa, Pa_base, Ra);
++      add(Pm, Pm_base, Ra);
++      slli(Ra, len, LogBytesPerWord);
++      add(Pb, Pb_base, Ra);
++      add(Pn, Pn_base, Ra);
 +
-+    BLOCK_COMMENT("type_check:");
++      // Ra = *++Pa;
++      // Rb = *--Pb;
++      // Rm = *++Pm;
++      // Rn = *--Pn;
++      add(Pa, Pa, wordSize);
++      ld(Ra, Address(Pa));
++      add(Pb, Pb, -wordSize);
++      ld(Rb, Address(Pb));
++      add(Pm, Pm, wordSize);
++      ld(Rm, Address(Pm));
++      add(Pn, Pn, -wordSize);
++      ld(Rn, Address(Pn));
 +
-+    Label L_miss;
++      mv(Rhi_mn, zr);
++      mv(Rlo_mn, zr);
++    }
 +
-+    __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg, &L_success, &L_miss, NULL, super_check_offset);
-+    __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL);
++    void post2(Register i, Register len) {
++      block_comment("post2");
++      sub(Rj, i, len);
 +
-+    // Fall through on failure!
-+    __ BIND(L_miss);
-+  }
++      cad(tmp0, tmp0, Rlo_mn, t0); // The pending m*n, low part
 +
-+  //
-+  //  Generate checkcasting array copy stub
-+  //
-+  //  Input:
-+  //    c_rarg0   - source array address
-+  //    c_rarg1   - destination array address
-+  //    c_rarg2   - element count, treated as ssize_t, can be zero
-+  //    c_rarg3   - size_t ckoff (super_check_offset)
-+  //    c_rarg4   - oop ckval (super_klass)
-+  //
-+  //  Output:
-+  //    x10 ==  0  -  success
-+  //    x10 == -1^K - failure, where K is partial transfer count
-+  //
-+  address generate_checkcast_copy(const char* name, address* entry,
-+                                  bool dest_uninitialized = false) {
-+    Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop;
++      // As soon as we know the least significant digit of our result,
++      // store it.
++      // Pm_base[i-len] = tmp0;
++      // Rj as temp register
++      slli(Rj, Rj, LogBytesPerWord);
++      add(Rj, Pm_base, Rj);
++      sd(tmp0, Address(Rj));
 +
-+    // Input registers (after setup_arg_regs)
-+    const Register from        = c_rarg0;   // source array address
-+    const Register to          = c_rarg1;   // destination array address
-+    const Register count       = c_rarg2;   // elementscount
-+    const Register ckoff       = c_rarg3;   // super_check_offset
-+    const Register ckval       = c_rarg4;   // super_klass
++      // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0;
++      cadc(tmp0, tmp1, Rhi_mn, t0); // The pending m*n, high part
++      adc(tmp1, tmp2, zr, t0);
++      mv(tmp2, zr);
++    }
 +
-+    RegSet wb_pre_saved_regs   = RegSet::range(c_rarg0, c_rarg4);
-+    RegSet wb_post_saved_regs  = RegSet::of(count);
++    // A carry in tmp0 after Montgomery multiplication means that we
++    // should subtract multiples of n from our result in m.  We'll
++    // keep doing that until there is no carry.
++    void normalize(Register len) {
++      block_comment("normalize");
++      // while (tmp0)
++      //   tmp0 = sub(Pm_base, Pn_base, tmp0, len);
++      Label loop, post, again;
++      Register cnt = tmp1, i = tmp2; // Re-use registers; we're done with them now
++      beqz(tmp0, post); {
++        bind(again); {
++          mv(i, zr);
++          mv(cnt, len);
++          slli(Rn, i, LogBytesPerWord);
++          add(Rm, Pm_base, Rn);
++          ld(Rm, Address(Rm));
++          add(Rn, Pn_base, Rn);
++          ld(Rn, Address(Rn));
++          mv(t0, 1); // set carry flag, i.e. no borrow
++          align(16);
++          bind(loop); {
++            notr(Rn, Rn);
++            add(Rm, Rm, t0);
++            add(Rm, Rm, Rn);
++            sltu(t0, Rm, Rn);
++            slli(Rn, i, LogBytesPerWord); // Rn as temp register
++            add(Rn, Pm_base, Rn);
++            sd(Rm, Address(Rn));
++            add(i, i, 1);
++            slli(Rn, i, LogBytesPerWord);
++            add(Rm, Pm_base, Rn);
++            ld(Rm, Address(Rm));
++            add(Rn, Pn_base, Rn);
++            ld(Rn, Address(Rn));
++            sub(cnt, cnt, 1);
++          } bnez(cnt, loop);
++          addi(tmp0, tmp0, -1);
++          add(tmp0, tmp0, t0);
++        } bnez(tmp0, again);
++      } bind(post);
++    }
 +
-+    // Registers used as temps (x7, x9, x18 are save-on-entry)
-+    const Register count_save  = x19;       // orig elementscount
-+    const Register start_to    = x18;       // destination array start address
-+    const Register copied_oop  = x7;        // actual oop copied
-+    const Register r9_klass    = x9;        // oop._klass
++    // Move memory at s to d, reversing words.
++    //    Increments d to end of copied memory
++    //    Destroys tmp1, tmp2
++    //    Preserves len
++    //    Leaves s pointing to the address which was in d at start
++    void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) {
++      assert(tmp1 < x28 && tmp2 < x28, "register corruption");
 +
-+    //---------------------------------------------------------------
-+    // Assembler stub will be used for this call to arraycopy
-+    // if the two arrays are subtypes of Object[] but the
-+    // destination array type is not equal to or a supertype
-+    // of the source type.  Each element must be separately
-+    // checked.
++      slli(tmp1, len, LogBytesPerWord);
++      add(s, s, tmp1);
++      mv(tmp1, len);
++      unroll_2(tmp1,  &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2);
++      slli(tmp1, len, LogBytesPerWord);
++      sub(s, d, tmp1);
++    }
++    // [63...0] -> [31...0][63...32]
++    void reverse1(Register d, Register s, Register tmp) {
++      addi(s, s, -wordSize);
++      ld(tmp, Address(s));
++      ror_imm(tmp, tmp, 32, t0);
++      sd(tmp, Address(d));
++      addi(d, d, wordSize);
++    }
 +
-+    assert_different_registers(from, to, count, ckoff, ckval, start_to,
-+                               copied_oop, r9_klass, count_save);
++    void step_squaring() {
++      // An extra ACC
++      step();
++      acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
++    }
 +
-+    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", name);
-+    address start = __ pc();
++    void last_squaring(Register i) {
++      Label dont;
++      // if ((i & 1) == 0) {
++      andi(t0, i, 0x1);
++      bnez(t0, dont); {
++        // MACC(Ra, Rb, tmp0, tmp1, tmp2);
++        // Ra = *++Pa;
++        // Rb = *--Pb;
++        mulhu(Rhi_ab, Ra, Rb);
++        mul(Rlo_ab, Ra, Rb);
++        acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
++      } bind(dont);
++    }
 +
-+    __ enter(); // required for proper stackwalking of RuntimeStub frame
++    void extra_step_squaring() {
++      acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2);  // The pending m*n
 +
-+    // Caller of this entry point must set up the argument registers.
-+    if (entry != NULL) {
-+      *entry = __ pc();
-+      BLOCK_COMMENT("Entry:");
++      // MACC(Rm, Rn, tmp0, tmp1, tmp2);
++      // Rm = *++Pm;
++      // Rn = *--Pn;
++      mulhu(Rhi_mn, Rm, Rn);
++      mul(Rlo_mn, Rm, Rn);
++      addi(Pm, Pm, wordSize);
++      ld(Rm, Address(Pm));
++      addi(Pn, Pn, -wordSize);
++      ld(Rn, Address(Pn));
 +    }
 +
-+    // Empty array:  Nothing to do
-+    __ beqz(count, L_done);
++    void post1_squaring() {
++      acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2);  // The pending m*n
 +
-+    __ push_reg(RegSet::of(x7, x9, x18, x19), sp);
++      // *Pm = Rm = tmp0 * inv;
++      mul(Rm, tmp0, inv);
++      sd(Rm, Address(Pm));
 +
-+#ifdef ASSERT
-+    BLOCK_COMMENT("assert consistent ckoff/ckval");
-+    // The ckoff and ckval must be mutually consistent,
-+    // even though caller generates both.
-+    { Label L;
-+      int sco_offset = in_bytes(Klass::super_check_offset_offset());
-+      __ lwu(start_to, Address(ckval, sco_offset));
-+      __ beq(ckoff, start_to, L);
-+      __ stop("super_check_offset inconsistent");
-+      __ bind(L);
-+    }
-+#endif //ASSERT
++      // MACC(Rm, Rn, tmp0, tmp1, tmp2);
++      // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0;
++      mulhu(Rhi_mn, Rm, Rn);
 +
-+    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT;
-+    bool is_oop = true;
-+    if (dest_uninitialized) {
-+      decorators |= IS_DEST_UNINITIALIZED;
++#ifndef PRODUCT
++      // assert(m[i] * n[0] + tmp0 == 0, "broken Montgomery multiply");
++      {
++        mul(Rlo_mn, Rm, Rn);
++        add(Rlo_mn, tmp0, Rlo_mn);
++        Label ok;
++        beqz(Rlo_mn, ok); {
++          stop("broken Montgomery multiply");
++        } bind(ok);
++      }
++#endif
++      // We have very carefully set things up so that
++      // m[i]*n[0] + tmp0 == 0 (mod b), so we don't have to calculate
++      // the lower half of Rm * Rn because we know the result already:
++      // it must be -tmp0.  tmp0 + (-tmp0) must generate a carry iff
++      // tmp0 != 0.  So, rather than do a mul and a cad we just set
++      // the carry flag iff tmp0 is nonzero.
++      //
++      // mul(Rlo_mn, Rm, Rn);
++      // cad(zr, tmp, Rlo_mn);
++      addi(t0, tmp0, -1);
++      sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero
++      cadc(tmp0, tmp1, Rhi_mn, t0);
++      adc(tmp1, tmp2, zr, t0);
++      mv(tmp2, zr);
 +    }
 +
-+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+    bs->arraycopy_prologue(_masm, decorators, is_oop, from, to, count, wb_pre_saved_regs);
++    // use t0 as carry
++    void acc(Register Rhi, Register Rlo,
++             Register tmp0, Register tmp1, Register tmp2) {
++      cad(tmp0, tmp0, Rlo, t0);
++      cadc(tmp1, tmp1, Rhi, t0);
++      adc(tmp2, tmp2, zr, t0);
++    }
 +
-+    // save the original count
-+    __ mv(count_save, count);
++  public:
++    /**
++     * Fast Montgomery multiplication.  The derivation of the
++     * algorithm is in A Cryptographic Library for the Motorola
++     * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
++     *
++     * Arguments:
++     *
++     * Inputs for multiplication:
++     *   c_rarg0   - int array elements a
++     *   c_rarg1   - int array elements b
++     *   c_rarg2   - int array elements n (the modulus)
++     *   c_rarg3   - int length
++     *   c_rarg4   - int inv
++     *   c_rarg5   - int array elements m (the result)
++     *
++     * Inputs for squaring:
++     *   c_rarg0   - int array elements a
++     *   c_rarg1   - int array elements n (the modulus)
++     *   c_rarg2   - int length
++     *   c_rarg3   - int inv
++     *   c_rarg4   - int array elements m (the result)
++     *
++     */
++    address generate_multiply() {
++      Label argh, nothing;
++      bind(argh);
++      stop("MontgomeryMultiply total_allocation must be <= 8192");
 +
-+    // Copy from low to high addresses
-+    __ mv(start_to, to);              // Save destination array start address
-+    __ j(L_load_element);
++      align(CodeEntryAlignment);
++      address entry = pc();
 +
-+    // ======== begin loop ========
-+    // (Loop is rotated; its entry is L_load_element.)
-+    // Loop control:
-+    //   for count to 0 do
-+    //     copied_oop = load_heap_oop(from++)
-+    //     ... generate_type_check ...
-+    //     store_heap_oop(to++, copied_oop)
-+    //   end
++      beqz(Rlen, nothing);
 +
-+    __ align(OptoLoopAlignment);
++      enter();
 +
-+    __ BIND(L_store_element);
-+    __ store_heap_oop(Address(to, 0), copied_oop, noreg, noreg, AS_RAW);  // store the oop
-+    __ add(to, to, UseCompressedOops ? 4 : 8);
-+    __ sub(count, count, 1);
-+    __ beqz(count, L_do_card_marks);
++      // Make room.
++      mv(Ra, 512);
++      bgt(Rlen, Ra, argh);
++      slli(Ra, Rlen, exact_log2(4 * sizeof(jint)));
++      sub(Ra, sp, Ra);
++      andi(sp, Ra, -2 * wordSize);
 +
-+    // ======== loop entry is here ========
-+    __ BIND(L_load_element);
-+    __ load_heap_oop(copied_oop, Address(from, 0), noreg, noreg, AS_RAW); // load the oop
-+    __ add(from, from, UseCompressedOops ? 4 : 8);
-+    __ beqz(copied_oop, L_store_element);
++      srliw(Rlen, Rlen, 1);  // length in longwords = len/2
 +
-+    __ load_klass(r9_klass, copied_oop);// query the object klass
-+    generate_type_check(r9_klass, ckoff, ckval, L_store_element);
-+    // ======== end loop ========
++      {
++        // Copy input args, reversing as we go.  We use Ra as a
++        // temporary variable.
++        reverse(Ra, Pa_base, Rlen, Ri, Rj);
++        if (!_squaring)
++          reverse(Ra, Pb_base, Rlen, Ri, Rj);
++        reverse(Ra, Pn_base, Rlen, Ri, Rj);
++      }
 +
-+    // It was a real error; we must depend on the caller to finish the job.
-+    // Register count = remaining oops, count_orig = total oops.
-+    // Emit GC store barriers for the oops we have copied and report
-+    // their number to the caller.
++      // Push all call-saved registers and also Pm_base which we'll need
++      // at the end.
++      save_regs();
 +
-+    __ sub(count, count_save, count);     // K = partially copied oop count
-+    __ xori(count, count, -1);                   // report (-1^K) to caller
-+    __ beqz(count, L_done_pop);
++#ifndef PRODUCT
++      // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
++      {
++        ld(Rn, Address(Pn_base));
++        mul(Rlo_mn, Rn, inv);
++        mv(t0, -1);
++        Label ok;
++        beq(Rlo_mn, t0, ok);
++        stop("broken inverse in Montgomery multiply");
++        bind(ok);
++      }
++#endif
 +
-+    __ BIND(L_do_card_marks);
-+    bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, count_save, t0, wb_post_saved_regs);
++      mv(Pm_base, Ra);
 +
-+    __ bind(L_done_pop);
-+    __ pop_reg(RegSet::of(x7, x9, x18, x19), sp);
-+    inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
++      mv(tmp0, zr);
++      mv(tmp1, zr);
++      mv(tmp2, zr);
 +
-+    __ bind(L_done);
-+    __ mv(x10, count);
-+    __ leave();
-+    __ ret();
++      block_comment("for (int i = 0; i < len; i++) {");
++      mv(Ri, zr); {
++        Label loop, end;
++        bge(Ri, Rlen, end);
 +
-+    return start;
-+  }
++        bind(loop);
++        pre1(Ri);
 +
-+  // Perform range checks on the proposed arraycopy.
-+  // Kills temp, but nothing else.
-+  // Also, clean the sign bits of src_pos and dst_pos.
-+  void arraycopy_range_checks(Register src,     // source array oop (c_rarg0)
-+                              Register src_pos, // source position (c_rarg1)
-+                              Register dst,     // destination array oo (c_rarg2)
-+                              Register dst_pos, // destination position (c_rarg3)
-+                              Register length,
-+                              Register temp,
-+                              Label& L_failed) {
-+    BLOCK_COMMENT("arraycopy_range_checks:");
++        block_comment("  for (j = i; j; j--) {"); {
++          mv(Rj, Ri);
++          unroll_2(Rj, &MontgomeryMultiplyGenerator::step);
++        } block_comment("  } // j");
 +
-+    assert_different_registers(t0, temp);
++        post1();
++        addw(Ri, Ri, 1);
++        blt(Ri, Rlen, loop);
++        bind(end);
++        block_comment("} // i");
++      }
 +
-+    // if [src_pos + length > arrayOop(src)->length()] then FAIL
-+    __ lwu(t0, Address(src, arrayOopDesc::length_offset_in_bytes()));
-+    __ addw(temp, length, src_pos);
-+    __ bgtu(temp, t0, L_failed);
++      block_comment("for (int i = len; i < 2*len; i++) {");
++      mv(Ri, Rlen); {
++        Label loop, end;
++        slli(t0, Rlen, 1);
++        bge(Ri, t0, end);
 +
-+    // if [dst_pos + length > arrayOop(dst)->length()] then FAIL
-+    __ lwu(t0, Address(dst, arrayOopDesc::length_offset_in_bytes()));
-+    __ addw(temp, length, dst_pos);
-+    __ bgtu(temp, t0, L_failed);
++        bind(loop);
++        pre2(Ri, Rlen);
 +
-+    // Have to clean up high 32 bits of 'src_pos' and 'dst_pos'.
-+    __ zero_extend(src_pos, src_pos, 32);
-+    __ zero_extend(dst_pos, dst_pos, 32);
++        block_comment("  for (j = len*2-i-1; j; j--) {"); {
++          slliw(Rj, Rlen, 1);
++          subw(Rj, Rj, Ri);
++          subw(Rj, Rj, 1);
++          unroll_2(Rj, &MontgomeryMultiplyGenerator::step);
++        } block_comment("  } // j");
 +
-+    BLOCK_COMMENT("arraycopy_range_checks done");
-+  }
++        post2(Ri, Rlen);
++        addw(Ri, Ri, 1);
++        slli(t0, Rlen, 1);
++        blt(Ri, t0, loop);
++        bind(end);
++      }
++      block_comment("} // i");
 +
-+  //
-+  //  Generate 'unsafe' array copy stub
-+  //  Though just as safe as the other stubs, it takes an unscaled
-+  //  size_t argument instead of an element count.
-+  //
-+  //  Input:
-+  //    c_rarg0   - source array address
-+  //    c_rarg1   - destination array address
-+  //    c_rarg2   - byte count, treated as ssize_t, can be zero
-+  //
-+  // Examines the alignment of the operands and dispatches
-+  // to a long, int, short, or byte copy loop.
-+  //
-+  address generate_unsafe_copy(const char* name,
-+                               address byte_copy_entry,
-+                               address short_copy_entry,
-+                               address int_copy_entry,
-+                               address long_copy_entry) {
-+    assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL &&
-+                int_copy_entry != NULL && long_copy_entry != NULL);
-+    Label L_long_aligned, L_int_aligned, L_short_aligned;
-+    const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
++      normalize(Rlen);
 +
-+    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", name);
-+    address start = __ pc();
-+    __ enter(); // required for proper stackwalking of RuntimeStub frame
++      mv(Ra, Pm_base);  // Save Pm_base in Ra
++      restore_regs();  // Restore caller's Pm_base
 +
-+    // bump this on entry, not on exit:
-+    inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr);
++      // Copy our result into caller's Pm_base
++      reverse(Pm_base, Ra, Rlen, Ri, Rj);
 +
-+    __ orr(t0, s, d);
-+    __ orr(t0, t0, count);
++      leave();
++      bind(nothing);
++      ret();
 +
-+    __ andi(t0, t0, BytesPerLong - 1);
-+    __ beqz(t0, L_long_aligned);
-+    __ andi(t0, t0, BytesPerInt - 1);
-+    __ beqz(t0, L_int_aligned);
-+    __ andi(t0, t0, 1);
-+    __ beqz(t0, L_short_aligned);
-+    __ j(RuntimeAddress(byte_copy_entry));
++      return entry;
++    }
 +
-+    __ BIND(L_short_aligned);
-+    __ srli(count, count, LogBytesPerShort);  // size => short_count
-+    __ j(RuntimeAddress(short_copy_entry));
-+    __ BIND(L_int_aligned);
-+    __ srli(count, count, LogBytesPerInt);    // size => int_count
-+    __ j(RuntimeAddress(int_copy_entry));
-+    __ BIND(L_long_aligned);
-+    __ srli(count, count, LogBytesPerLong);   // size => long_count
-+    __ j(RuntimeAddress(long_copy_entry));
++    /**
++     *
++     * Arguments:
++     *
++     * Inputs:
++     *   c_rarg0   - int array elements a
++     *   c_rarg1   - int array elements n (the modulus)
++     *   c_rarg2   - int length
++     *   c_rarg3   - int inv
++     *   c_rarg4   - int array elements m (the result)
++     *
++     */
++    address generate_square() {
++      Label argh;
++      bind(argh);
++      stop("MontgomeryMultiply total_allocation must be <= 8192");
 +
-+    return start;
-+  }
++      align(CodeEntryAlignment);
++      address entry = pc();
 +
-+  //
-+  //  Generate generic array copy stubs
-+  //
-+  //  Input:
-+  //    c_rarg0    -  src oop
-+  //    c_rarg1    -  src_pos (32-bits)
-+  //    c_rarg2    -  dst oop
-+  //    c_rarg3    -  dst_pos (32-bits)
-+  //    c_rarg4    -  element count (32-bits)
-+  //
-+  //  Output:
-+  //    x10 ==  0  -  success
-+  //    x10 == -1^K - failure, where K is partial transfer count
-+  //
-+  address generate_generic_copy(const char* name,
-+                                address byte_copy_entry, address short_copy_entry,
-+                                address int_copy_entry, address oop_copy_entry,
-+                                address long_copy_entry, address checkcast_copy_entry) {
-+    assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL &&
-+                int_copy_entry != NULL && oop_copy_entry != NULL &&
-+                long_copy_entry != NULL && checkcast_copy_entry != NULL);
-+    Label L_failed, L_failed_0, L_objArray;
-+    Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs;
++      enter();
 +
-+    // Input registers
-+    const Register src        = c_rarg0;  // source array oop
-+    const Register src_pos    = c_rarg1;  // source position
-+    const Register dst        = c_rarg2;  // destination array oop
-+    const Register dst_pos    = c_rarg3;  // destination position
-+    const Register length     = c_rarg4;
++      // Make room.
++      mv(Ra, 512);
++      bgt(Rlen, Ra, argh);
++      slli(Ra, Rlen, exact_log2(4 * sizeof(jint)));
++      sub(Ra, sp, Ra);
++      andi(sp, Ra, -2 * wordSize);
 +
-+    // Registers used as temps
-+    const Register dst_klass = c_rarg5;
++      srliw(Rlen, Rlen, 1);  // length in longwords = len/2
 +
-+    __ align(CodeEntryAlignment);
++      {
++        // Copy input args, reversing as we go.  We use Ra as a
++        // temporary variable.
++        reverse(Ra, Pa_base, Rlen, Ri, Rj);
++        reverse(Ra, Pn_base, Rlen, Ri, Rj);
++      }
 +
-+    StubCodeMark mark(this, "StubRoutines", name);
++      // Push all call-saved registers and also Pm_base which we'll need
++      // at the end.
++      save_regs();
 +
-+    address start = __ pc();
++      mv(Pm_base, Ra);
 +
-+    __ enter(); // required for proper stackwalking of RuntimeStub frame
++      mv(tmp0, zr);
++      mv(tmp1, zr);
++      mv(tmp2, zr);
 +
-+    // bump this on entry, not on exit:
-+    inc_counter_np(SharedRuntime::_generic_array_copy_ctr);
++      block_comment("for (int i = 0; i < len; i++) {");
++      mv(Ri, zr); {
++        Label loop, end;
++        bind(loop);
++        bge(Ri, Rlen, end);
 +
-+    //-----------------------------------------------------------------------
-+    // Assembler stub will be used for this call to arraycopy
-+    // if the following conditions are met:
-+    //
-+    // (1) src and dst must not be null.
-+    // (2) src_pos must not be negative.
-+    // (3) dst_pos must not be negative.
-+    // (4) length  must not be negative.
-+    // (5) src klass and dst klass should be the same and not NULL.
-+    // (6) src and dst should be arrays.
-+    // (7) src_pos + length must not exceed length of src.
-+    // (8) dst_pos + length must not exceed length of dst.
-+    //
++        pre1(Ri);
 +
-+    // if [src == NULL] then return -1
-+    __ beqz(src, L_failed);
++        block_comment("for (j = (i+1)/2; j; j--) {"); {
++          addi(Rj, Ri, 1);
++          srliw(Rj, Rj, 1);
++          unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring);
++        } block_comment("  } // j");
 +
-+    // if [src_pos < 0] then return -1
-+    // i.e. sign bit set
-+    __ andi(t0, src_pos, 1UL << 31);
-+    __ bnez(t0, L_failed);
++        last_squaring(Ri);
 +
-+    // if [dst == NULL] then return -1
-+    __ beqz(dst, L_failed);
++        block_comment("  for (j = i/2; j; j--) {"); {
++          srliw(Rj, Ri, 1);
++          unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring);
++        } block_comment("  } // j");
 +
-+    // if [dst_pos < 0] then return -1
-+    // i.e. sign bit set
-+    __ andi(t0, dst_pos, 1UL << 31);
-+    __ bnez(t0, L_failed);
++        post1_squaring();
++        addi(Ri, Ri, 1);
++        blt(Ri, Rlen, loop);
 +
-+    // registers used as temp
-+    const Register scratch_length    = x28; // elements count to copy
-+    const Register scratch_src_klass = x29; // array klass
-+    const Register lh                = x30; // layout helper
++        bind(end);
++        block_comment("} // i");
++      }
 +
-+    // if [length < 0] then return -1
-+    __ addw(scratch_length, length, zr);    // length (elements count, 32-bits value)
-+    // i.e. sign bit set
-+    __ andi(t0, scratch_length, 1UL << 31);
-+    __ bnez(t0, L_failed);
++      block_comment("for (int i = len; i < 2*len; i++) {");
++      mv(Ri, Rlen); {
++        Label loop, end;
++        bind(loop);
++        slli(t0, Rlen, 1);
++        bge(Ri, t0, end);
 +
-+    __ load_klass(scratch_src_klass, src);
-+#ifdef ASSERT
-+    {
-+      BLOCK_COMMENT("assert klasses not null {");
-+      Label L1, L2;
-+      __ bnez(scratch_src_klass, L2);   // it is broken if klass is NULL
-+      __ bind(L1);
-+      __ stop("broken null klass");
-+      __ bind(L2);
-+      __ load_klass(t0, dst);
-+      __ beqz(t0, L1);     // this would be broken also
-+      BLOCK_COMMENT("} assert klasses not null done");
-+    }
-+#endif
++        pre2(Ri, Rlen);
 +
-+    // Load layout helper (32-bits)
-+    //
-+    //  |array_tag|     | header_size | element_type |     |log2_element_size|
-+    // 32        30    24            16              8     2                 0
-+    //
-+    //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
-+    //
++        block_comment("  for (j = (2*len-i-1)/2; j; j--) {"); {
++          slli(Rj, Rlen, 1);
++          sub(Rj, Rj, Ri);
++          sub(Rj, Rj, 1);
++          srliw(Rj, Rj, 1);
++          unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring);
++        } block_comment("  } // j");
 +
-+    const int lh_offset = in_bytes(Klass::layout_helper_offset());
++        last_squaring(Ri);
 +
-+    // Handle objArrays completely differently...
-+    const jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
-+    __ lw(lh, Address(scratch_src_klass, lh_offset));
-+    __ mvw(t0, objArray_lh);
-+    __ beq(lh, t0, L_objArray);
++        block_comment("  for (j = (2*len-i)/2; j; j--) {"); {
++          slli(Rj, Rlen, 1);
++          sub(Rj, Rj, Ri);
++          srliw(Rj, Rj, 1);
++          unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring);
++        } block_comment("  } // j");
 +
-+    // if [src->klass() != dst->klass()] then return -1
-+    __ load_klass(t1, dst);
-+    __ bne(t1, scratch_src_klass, L_failed);
++        post2(Ri, Rlen);
++        addi(Ri, Ri, 1);
++        slli(t0, Rlen, 1);
++        blt(Ri, t0, loop);
 +
-+    // if [src->is_Array() != NULL] then return -1
-+    // i.e. (lh >= 0)
-+    __ andi(t0, lh, 1UL << 31);
-+    __ beqz(t0, L_failed);
++        bind(end);
++        block_comment("} // i");
++      }
 +
-+    // At this point, it is known to be a typeArray (array_tag 0x3).
-+#ifdef ASSERT
-+    {
-+      BLOCK_COMMENT("assert primitive array {");
-+      Label L;
-+      __ mvw(t1, Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
-+      __ bge(lh, t1, L);
-+      __ stop("must be a primitive array");
-+      __ bind(L);
-+      BLOCK_COMMENT("} assert primitive array done");
-+    }
-+#endif
++      normalize(Rlen);
 +
-+    arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
-+                           t1, L_failed);
++      mv(Ra, Pm_base);  // Save Pm_base in Ra
++      restore_regs();  // Restore caller's Pm_base
 +
-+    // TypeArrayKlass
-+    //
-+    // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize)
-+    // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize)
-+    //
++      // Copy our result into caller's Pm_base
++      reverse(Pm_base, Ra, Rlen, Ri, Rj);
 +
-+    const Register t0_offset = t0;    // array offset
-+    const Register x22_elsize = lh;   // element size
++      leave();
++      ret();
 +
-+    // Get array_header_in_bytes()
-+    int lh_header_size_width = exact_log2(Klass::_lh_header_size_mask + 1);
-+    int lh_header_size_msb = Klass::_lh_header_size_shift + lh_header_size_width;
-+    __ slli(t0_offset, lh, XLEN - lh_header_size_msb);          // left shift to remove 24 ~ 32;
-+    __ srli(t0_offset, t0_offset, XLEN - lh_header_size_width); // array_offset
++      return entry;
++    }
++  };
++#endif // COMPILER2
 +
-+    __ add(src, src, t0_offset);           // src array offset
-+    __ add(dst, dst, t0_offset);           // dst array offset
-+    BLOCK_COMMENT("choose copy loop based on element size");
++  // Continuation point for throwing of implicit exceptions that are
++  // not handled in the current activation. Fabricates an exception
++  // oop and initiates normal exception dispatching in this
++  // frame. Since we need to preserve callee-saved values (currently
++  // only for C2, but done for C1 as well) we need a callee-saved oop
++  // map and therefore have to make these stubs into RuntimeStubs
++  // rather than BufferBlobs.  If the compiler needs all registers to
++  // be preserved between the fault point and the exception handler
++  // then it must assume responsibility for that in
++  // AbstractCompiler::continuation_for_implicit_null_exception or
++  // continuation_for_implicit_division_by_zero_exception. All other
++  // implicit exceptions (e.g., NullPointerException or
++  // AbstractMethodError on entry) are either at call sites or
++  // otherwise assume that stack unwinding will be initiated, so
++  // caller saved registers were assumed volatile in the compiler.
 +
-+    // next registers should be set before the jump to corresponding stub
-+    const Register from     = c_rarg0;  // source array address
-+    const Register to       = c_rarg1;  // destination array address
-+    const Register count    = c_rarg2;  // elements count
++#undef __
++#define __ masm->
 +
-+    // 'from', 'to', 'count' registers should be set in such order
-+    // since they are the same as 'src', 'src_pos', 'dst'.
++  address generate_throw_exception(const char* name,
++                                   address runtime_entry,
++                                   Register arg1 = noreg,
++                                   Register arg2 = noreg) {
++    // Information about frame layout at time of blocking runtime call.
++    // Note that we only have to preserve callee-saved registers since
++    // the compilers are responsible for supplying a continuation point
++    // if they expect all registers to be preserved.
++    // n.b. riscv asserts that frame::arg_reg_save_area_bytes == 0
++    assert_cond(runtime_entry != NULL);
++    enum layout {
++      fp_off = 0,
++      fp_off2,
++      return_off,
++      return_off2,
++      framesize // inclusive of return address
++    };
 +
-+    assert(Klass::_lh_log2_element_size_shift == 0, "fix this code");
++    const int insts_size = 512;
++    const int locs_size  = 64;
 +
-+    // The possible values of elsize are 0-3, i.e. exact_log2(element
-+    // size in bytes).  We do a simple bitwise binary search.
-+  __ BIND(L_copy_bytes);
-+    __ andi(t0, x22_elsize, 2);
-+    __ bnez(t0, L_copy_ints);
-+    __ andi(t0, x22_elsize, 1);
-+    __ bnez(t0, L_copy_shorts);
-+    __ add(from, src, src_pos); // src_addr
-+    __ add(to, dst, dst_pos); // dst_addr
-+    __ addw(count, scratch_length, zr); // length
-+    __ j(RuntimeAddress(byte_copy_entry));
++    CodeBuffer code(name, insts_size, locs_size);
++    OopMapSet* oop_maps  = new OopMapSet();
++    MacroAssembler* masm = new MacroAssembler(&code);
++    assert_cond(oop_maps != NULL && masm != NULL);
 +
-+  __ BIND(L_copy_shorts);
-+    __ shadd(from, src_pos, src, t0, 1); // src_addr
-+    __ shadd(to, dst_pos, dst, t0, 1); // dst_addr
-+    __ addw(count, scratch_length, zr); // length
-+    __ j(RuntimeAddress(short_copy_entry));
++    address start = __ pc();
 +
-+  __ BIND(L_copy_ints);
-+    __ andi(t0, x22_elsize, 1);
-+    __ bnez(t0, L_copy_longs);
-+    __ shadd(from, src_pos, src, t0, 2); // src_addr
-+    __ shadd(to, dst_pos, dst, t0, 2); // dst_addr
-+    __ addw(count, scratch_length, zr); // length
-+    __ j(RuntimeAddress(int_copy_entry));
++    // This is an inlined and slightly modified version of call_VM
++    // which has the ability to fetch the return PC out of
++    // thread-local storage and also sets up last_Java_sp slightly
++    // differently than the real call_VM
 +
-+  __ BIND(L_copy_longs);
-+#ifdef ASSERT
-+    {
-+      BLOCK_COMMENT("assert long copy {");
-+      Label L;
-+      __ andi(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> x22_elsize
-+      __ addw(lh, lh, zr);
-+      __ mvw(t0, LogBytesPerLong);
-+      __ beq(x22_elsize, t0, L);
-+      __ stop("must be long copy, but elsize is wrong");
-+      __ bind(L);
-+      BLOCK_COMMENT("} assert long copy done");
-+    }
-+#endif
-+    __ shadd(from, src_pos, src, t0, 3); // src_addr
-+    __ shadd(to, dst_pos, dst, t0, 3); // dst_addr
-+    __ addw(count, scratch_length, zr); // length
-+    __ j(RuntimeAddress(long_copy_entry));
++    __ enter(); // Save FP and RA before call
 +
-+    // ObjArrayKlass
-+  __ BIND(L_objArray);
-+    // live at this point:  scratch_src_klass, scratch_length, src[_pos], dst[_pos]
++    assert(is_even(framesize / 2), "sp not 16-byte aligned");
 +
-+    Label L_plain_copy, L_checkcast_copy;
-+    // test array classes for subtyping
-+    __ load_klass(t2, dst);
-+    __ bne(scratch_src_klass, t2, L_checkcast_copy); // usual case is exact equality
++    // ra and fp are already in place
++    __ addi(sp, fp, 0 - ((unsigned)framesize << LogBytesPerInt)); // prolog
 +
-+    // Identically typed arrays can be copied without element-wise checks.
-+    arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
-+                           t1, L_failed);
++    int frame_complete = __ pc() - start;
 +
-+    __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop);
-+    __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-+    __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop);
-+    __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-+    __ addw(count, scratch_length, zr); // length
-+  __ BIND(L_plain_copy);
-+    __ j(RuntimeAddress(oop_copy_entry));
-+
-+  __ BIND(L_checkcast_copy);
-+    // live at this point:  scratch_src_klass, scratch_length, t2 (dst_klass)
-+    {
-+      // Before looking at dst.length, make sure dst is also an objArray.
-+      __ lwu(t0, Address(t2, lh_offset));
-+      __ mvw(t1, objArray_lh);
-+      __ bne(t0, t1, L_failed);
-+
-+      // It is safe to examine both src.length and dst.length.
-+      arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
-+                             t2, L_failed);
++    // Set up last_Java_sp and last_Java_fp
++    address the_pc = __ pc();
++    __ set_last_Java_frame(sp, fp, the_pc, t0);
 +
-+      __ load_klass(dst_klass, dst); // reload
++    // Call runtime
++    if (arg1 != noreg) {
++      assert(arg2 != c_rarg1, "clobbered");
++      __ mv(c_rarg1, arg1);
++    }
++    if (arg2 != noreg) {
++      __ mv(c_rarg2, arg2);
++    }
++    __ mv(c_rarg0, xthread);
++    BLOCK_COMMENT("call runtime_entry");
++    int32_t offset = 0;
++    __ movptr_with_offset(t0, runtime_entry, offset);
++    __ jalr(x1, t0, offset);
 +
-+      // Marshal the base address arguments now, freeing registers.
-+      __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop);
-+      __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-+      __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop);
-+      __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-+      __ addw(count, length, zr);           // length (reloaded)
-+      const Register sco_temp = c_rarg3;      // this register is free now
-+      assert_different_registers(from, to, count, sco_temp,
-+                                 dst_klass, scratch_src_klass);
++    // Generate oop map
++    OopMap* map = new OopMap(framesize, 0);
++    assert_cond(map != NULL);
 +
-+      // Generate the type check.
-+      const int sco_offset = in_bytes(Klass::super_check_offset_offset());
-+      __ lwu(sco_temp, Address(dst_klass, sco_offset));
++    oop_maps->add_gc_map(the_pc - start, map);
 +
-+      // Smashes t0, t1
-+      generate_type_check(scratch_src_klass, sco_temp, dst_klass, L_plain_copy);
++    __ reset_last_Java_frame(true);
 +
-+      // Fetch destination element klass from the ObjArrayKlass header.
-+      int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
-+      __ ld(dst_klass, Address(dst_klass, ek_offset));
-+      __ lwu(sco_temp, Address(dst_klass, sco_offset));
++    __ leave();
 +
-+      // the checkcast_copy loop needs two extra arguments:
-+      assert(c_rarg3 == sco_temp, "#3 already in place");
-+      // Set up arguments for checkcast_copy_entry.
-+      __ mv(c_rarg4, dst_klass);  // dst.klass.element_klass
-+      __ j(RuntimeAddress(checkcast_copy_entry));
-+    }
++    // check for pending exceptions
++#ifdef ASSERT
++    Label L;
++    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
++    __ bnez(t0, L);
++    __ should_not_reach_here();
++    __ bind(L);
++#endif // ASSERT
++    __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
 +
-+  __ BIND(L_failed);
-+    __ li(x10, -1);
-+    __ leave();   // required for proper stackwalking of RuntimeStub frame
-+    __ ret();
 +
-+    return start;
++    // codeBlob framesize is in words (not VMRegImpl::slot_size)
++    RuntimeStub* stub =
++      RuntimeStub::new_runtime_stub(name,
++                                    &code,
++                                    frame_complete,
++                                    (framesize >> (LogBytesPerWord - LogBytesPerInt)),
++                                    oop_maps, false);
++    assert(stub != NULL, "create runtime stub fail!");
++    return stub->entry_point();
 +  }
 +
-+  //
-+  // Generate stub for array fill. If "aligned" is true, the
-+  // "to" address is assumed to be heapword aligned.
-+  //
-+  // Arguments for generated stub:
-+  //   to:    c_rarg0
-+  //   value: c_rarg1
-+  //   count: c_rarg2 treated as signed
-+  //
-+  address generate_fill(BasicType t, bool aligned, const char* name) {
-+    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", name);
-+    address start = __ pc();
++  // Initialization
++  void generate_initial() {
++    // Generate initial stubs and initializes the entry points
 +
-+    BLOCK_COMMENT("Entry:");
++    // entry points that exist in all platforms Note: This is code
++    // that could be shared among different platforms - however the
++    // benefit seems to be smaller than the disadvantage of having a
++    // much more complicated generator structure. See also comment in
++    // stubRoutines.hpp.
 +
-+    const Register to        = c_rarg0;  // source array address
-+    const Register value     = c_rarg1;  // value
-+    const Register count     = c_rarg2;  // elements count
++    StubRoutines::_forward_exception_entry = generate_forward_exception();
 +
-+    const Register bz_base   = x28;      // base for block_zero routine
-+    const Register cnt_words = x29;      // temp register
-+    const Register tmp_reg   = t1;
++    StubRoutines::_call_stub_entry =
++      generate_call_stub(StubRoutines::_call_stub_return_address);
 +
-+    __ enter();
++    // is referenced by megamorphic call
++    StubRoutines::_catch_exception_entry = generate_catch_exception();
 +
-+    Label L_fill_elements, L_exit1;
++    // Build this early so it's available for the interpreter.
++    StubRoutines::_throw_StackOverflowError_entry =
++      generate_throw_exception("StackOverflowError throw_exception",
++                               CAST_FROM_FN_PTR(address,
++                                                SharedRuntime::throw_StackOverflowError));
++    StubRoutines::_throw_delayed_StackOverflowError_entry =
++      generate_throw_exception("delayed StackOverflowError throw_exception",
++                               CAST_FROM_FN_PTR(address,
++                                                SharedRuntime::throw_delayed_StackOverflowError));
++    // Safefetch stubs.
++    generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
++                                                       &StubRoutines::_safefetch32_fault_pc,
++                                                       &StubRoutines::_safefetch32_continuation_pc);
++    generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
++                                                       &StubRoutines::_safefetchN_fault_pc,
++                                                       &StubRoutines::_safefetchN_continuation_pc);
++  }
 +
-+    int shift = -1;
-+    switch (t) {
-+      case T_BYTE:
-+        shift = 0;
++  void generate_all() {
++    // support for verify_oop (must happen after universe_init)
++    StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
++    StubRoutines::_throw_AbstractMethodError_entry =
++      generate_throw_exception("AbstractMethodError throw_exception",
++                               CAST_FROM_FN_PTR(address,
++                                                SharedRuntime::
++                                                throw_AbstractMethodError));
 +
-+        // Zero extend value
-+        // 8 bit -> 16 bit
-+        __ andi(value, value, 0xff);
-+        __ mv(tmp_reg, value);
-+        __ slli(tmp_reg, tmp_reg, 8);
-+        __ orr(value, value, tmp_reg);
++    StubRoutines::_throw_IncompatibleClassChangeError_entry =
++      generate_throw_exception("IncompatibleClassChangeError throw_exception",
++                               CAST_FROM_FN_PTR(address,
++                                                SharedRuntime::
++                                                throw_IncompatibleClassChangeError));
 +
-+        // 16 bit -> 32 bit
-+        __ mv(tmp_reg, value);
-+        __ slli(tmp_reg, tmp_reg, 16);
-+        __ orr(value, value, tmp_reg);
++    StubRoutines::_throw_NullPointerException_at_call_entry =
++      generate_throw_exception("NullPointerException at call throw_exception",
++                               CAST_FROM_FN_PTR(address,
++                                                SharedRuntime::
++                                                throw_NullPointerException_at_call));
++    // arraycopy stubs used by compilers
++    generate_arraycopy_stubs();
 +
-+        __ mv(tmp_reg, 8 >> shift); // Short arrays (< 8 bytes) fill by element
-+        __ bltu(count, tmp_reg, L_fill_elements);
-+        break;
-+      case T_SHORT:
-+        shift = 1;
-+        // Zero extend value
-+        // 16 bit -> 32 bit
-+        __ andi(value, value, 0xffff);
-+        __ mv(tmp_reg, value);
-+        __ slli(tmp_reg, tmp_reg, 16);
-+        __ orr(value, value, tmp_reg);
++#ifdef COMPILER2
++    if (UseMulAddIntrinsic) {
++      StubRoutines::_mulAdd = generate_mulAdd();
++    }
 +
-+        // Short arrays (< 8 bytes) fill by element
-+        __ mv(tmp_reg, 8 >> shift);
-+        __ bltu(count, tmp_reg, L_fill_elements);
-+        break;
-+      case T_INT:
-+        shift = 2;
++    if (UseMultiplyToLenIntrinsic) {
++      StubRoutines::_multiplyToLen = generate_multiplyToLen();
++    }
 +
-+        // Short arrays (< 8 bytes) fill by element
-+        __ mv(tmp_reg, 8 >> shift);
-+        __ bltu(count, tmp_reg, L_fill_elements);
-+        break;
-+      default: ShouldNotReachHere();
++    if (UseSquareToLenIntrinsic) {
++      StubRoutines::_squareToLen = generate_squareToLen();
 +    }
 +
-+    // Align source address at 8 bytes address boundary.
-+    Label L_skip_align1, L_skip_align2, L_skip_align4;
-+    if (!aligned) {
-+      switch (t) {
-+        case T_BYTE:
-+          // One byte misalignment happens only for byte arrays.
-+          __ andi(t0, to, 1);
-+          __ beqz(t0, L_skip_align1);
-+          __ sb(value, Address(to, 0));
-+          __ addi(to, to, 1);
-+          __ addiw(count, count, -1);
-+          __ bind(L_skip_align1);
-+          // Fallthrough
-+        case T_SHORT:
-+          // Two bytes misalignment happens only for byte and short (char) arrays.
-+          __ andi(t0, to, 2);
-+          __ beqz(t0, L_skip_align2);
-+          __ sh(value, Address(to, 0));
-+          __ addi(to, to, 2);
-+          __ addiw(count, count, -(2 >> shift));
-+          __ bind(L_skip_align2);
-+          // Fallthrough
-+        case T_INT:
-+          // Align to 8 bytes, we know we are 4 byte aligned to start.
-+          __ andi(t0, to, 4);
-+          __ beqz(t0, L_skip_align4);
-+          __ sw(value, Address(to, 0));
-+          __ addi(to, to, 4);
-+          __ addiw(count, count, -(4 >> shift));
-+          __ bind(L_skip_align4);
-+          break;
-+        default: ShouldNotReachHere();
-+      }
++    if (UseMontgomeryMultiplyIntrinsic) {
++      StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
++      MontgomeryMultiplyGenerator g(_masm, /*squaring*/false);
++      StubRoutines::_montgomeryMultiply = g.generate_multiply();
 +    }
 +
-+    //
-+    //  Fill large chunks
-+    //
-+    __ srliw(cnt_words, count, 3 - shift); // number of words
++    if (UseMontgomerySquareIntrinsic) {
++      StubCodeMark mark(this, "StubRoutines", "montgomerySquare");
++      MontgomeryMultiplyGenerator g(_masm, /*squaring*/true);
++      StubRoutines::_montgomerySquare = g.generate_square();
++    }
++#endif
 +
-+    // 32 bit -> 64 bit
-+    __ andi(value, value, 0xffffffff);
-+    __ mv(tmp_reg, value);
-+    __ slli(tmp_reg, tmp_reg, 32);
-+    __ orr(value, value, tmp_reg);
++    generate_compare_long_strings();
 +
-+    __ slli(tmp_reg, cnt_words, 3 - shift);
-+    __ subw(count, count, tmp_reg);
-+    {
-+      __ fill_words(to, cnt_words, value);
-+    }
++    generate_string_indexof_stubs();
 +
-+    // Remaining count is less than 8 bytes. Fill it by a single store.
-+    // Note that the total length is no less than 8 bytes.
-+    if (t == T_BYTE || t == T_SHORT) {
-+      __ beqz(count, L_exit1);
-+      __ shadd(to, count, to, tmp_reg, shift); // points to the end
-+      __ sd(value, Address(to, -8)); // overwrite some elements
-+      __ bind(L_exit1);
-+      __ leave();
-+      __ ret();
-+    }
++    StubRoutines::riscv::set_completed();
++  }
 +
-+    // Handle copies less than 8 bytes.
-+    Label L_fill_2, L_fill_4, L_exit2;
-+    __ bind(L_fill_elements);
-+    switch (t) {
-+      case T_BYTE:
-+        __ andi(t0, count, 1);
-+        __ beqz(t0, L_fill_2);
-+        __ sb(value, Address(to, 0));
-+        __ addi(to, to, 1);
-+        __ bind(L_fill_2);
-+        __ andi(t0, count, 2);
-+        __ beqz(t0, L_fill_4);
-+        __ sh(value, Address(to, 0));
-+        __ addi(to, to, 2);
-+        __ bind(L_fill_4);
-+        __ andi(t0, count, 4);
-+        __ beqz(t0, L_exit2);
-+        __ sw(value, Address(to, 0));
-+        break;
-+      case T_SHORT:
-+        __ andi(t0, count, 1);
-+        __ beqz(t0, L_fill_4);
-+        __ sh(value, Address(to, 0));
-+        __ addi(to, to, 2);
-+        __ bind(L_fill_4);
-+        __ andi(t0, count, 2);
-+        __ beqz(t0, L_exit2);
-+        __ sw(value, Address(to, 0));
-+        break;
-+      case T_INT:
-+        __ beqz(count, L_exit2);
-+        __ sw(value, Address(to, 0));
-+        break;
-+      default: ShouldNotReachHere();
++ public:
++  StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
++    if (all) {
++      generate_all();
++    } else {
++      generate_initial();
 +    }
-+    __ bind(L_exit2);
-+    __ leave();
-+    __ ret();
-+    return start;
 +  }
 +
-+  void generate_arraycopy_stubs() {
-+    address entry                     = NULL;
-+    address entry_jbyte_arraycopy     = NULL;
-+    address entry_jshort_arraycopy    = NULL;
-+    address entry_jint_arraycopy      = NULL;
-+    address entry_oop_arraycopy       = NULL;
-+    address entry_jlong_arraycopy     = NULL;
-+    address entry_checkcast_arraycopy = NULL;
++  ~StubGenerator() {}
++}; // end class declaration
 +
-+    generate_copy_longs(copy_f, c_rarg0, c_rarg1, t1, copy_forwards);
-+    generate_copy_longs(copy_b, c_rarg0, c_rarg1, t1, copy_backwards);
++void StubGenerator_generate(CodeBuffer* code, bool all) {
++  StubGenerator g(code, all);
++}
+diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
+new file mode 100644
+index 0000000000..9202d9ec4b
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
+@@ -0,0 +1,57 @@
++/*
++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+    StubRoutines::riscv::_zero_blocks = generate_zero_blocks();
++#include "precompiled.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++#include "utilities/globalDefinitions.hpp"
 +
-+    //*** jbyte
-+    // Always need aligned and unaligned versions
-+    StubRoutines::_jbyte_disjoint_arraycopy          = generate_disjoint_byte_copy(false, &entry,
-+                                                                                   "jbyte_disjoint_arraycopy");
-+    StubRoutines::_jbyte_arraycopy                   = generate_conjoint_byte_copy(false, entry,
-+                                                                                   &entry_jbyte_arraycopy,
-+                                                                                   "jbyte_arraycopy");
-+    StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = generate_disjoint_byte_copy(true, &entry,
-+                                                                                   "arrayof_jbyte_disjoint_arraycopy");
-+    StubRoutines::_arrayof_jbyte_arraycopy           = generate_conjoint_byte_copy(true, entry, NULL,
-+                                                                                   "arrayof_jbyte_arraycopy");
++// Implementation of the platform-specific part of StubRoutines - for
++// a description of how to extend it, see the stubRoutines.hpp file.
 +
-+    //*** jshort
-+    // Always need aligned and unaligned versions
-+    StubRoutines::_jshort_disjoint_arraycopy         = generate_disjoint_short_copy(false, &entry,
-+                                                                                    "jshort_disjoint_arraycopy");
-+    StubRoutines::_jshort_arraycopy                  = generate_conjoint_short_copy(false, entry,
-+                                                                                    &entry_jshort_arraycopy,
-+                                                                                    "jshort_arraycopy");
-+    StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry,
-+                                                                                    "arrayof_jshort_disjoint_arraycopy");
-+    StubRoutines::_arrayof_jshort_arraycopy          = generate_conjoint_short_copy(true, entry, NULL,
-+                                                                                    "arrayof_jshort_arraycopy");
++address StubRoutines::riscv::_get_previous_sp_entry = NULL;
 +
-+    //*** jint
-+    // Aligned versions
-+    StubRoutines::_arrayof_jint_disjoint_arraycopy   = generate_disjoint_int_copy(true, &entry,
-+                                                                                  "arrayof_jint_disjoint_arraycopy");
-+    StubRoutines::_arrayof_jint_arraycopy            = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy,
-+                                                                                  "arrayof_jint_arraycopy");
-+    // In 64 bit we need both aligned and unaligned versions of jint arraycopy.
-+    // entry_jint_arraycopy always points to the unaligned version
-+    StubRoutines::_jint_disjoint_arraycopy           = generate_disjoint_int_copy(false, &entry,
-+                                                                                  "jint_disjoint_arraycopy");
-+    StubRoutines::_jint_arraycopy                    = generate_conjoint_int_copy(false, entry,
-+                                                                                  &entry_jint_arraycopy,
-+                                                                                  "jint_arraycopy");
++address StubRoutines::riscv::_f2i_fixup = NULL;
++address StubRoutines::riscv::_f2l_fixup = NULL;
++address StubRoutines::riscv::_d2i_fixup = NULL;
++address StubRoutines::riscv::_d2l_fixup = NULL;
++address StubRoutines::riscv::_float_sign_mask = NULL;
++address StubRoutines::riscv::_float_sign_flip = NULL;
++address StubRoutines::riscv::_double_sign_mask = NULL;
++address StubRoutines::riscv::_double_sign_flip = NULL;
++address StubRoutines::riscv::_zero_blocks = NULL;
++address StubRoutines::riscv::_compare_long_string_LL = NULL;
++address StubRoutines::riscv::_compare_long_string_UU = NULL;
++address StubRoutines::riscv::_compare_long_string_LU = NULL;
++address StubRoutines::riscv::_compare_long_string_UL = NULL;
++address StubRoutines::riscv::_string_indexof_linear_ll = NULL;
++address StubRoutines::riscv::_string_indexof_linear_uu = NULL;
++address StubRoutines::riscv::_string_indexof_linear_ul = NULL;
++address StubRoutines::riscv::_large_byte_array_inflate = NULL;
 +
-+    //*** jlong
-+    // It is always aligned
-+    StubRoutines::_arrayof_jlong_disjoint_arraycopy  = generate_disjoint_long_copy(true, &entry,
-+                                                                                   "arrayof_jlong_disjoint_arraycopy");
-+    StubRoutines::_arrayof_jlong_arraycopy           = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy,
-+                                                                                   "arrayof_jlong_arraycopy");
-+    StubRoutines::_jlong_disjoint_arraycopy          = StubRoutines::_arrayof_jlong_disjoint_arraycopy;
-+    StubRoutines::_jlong_arraycopy                   = StubRoutines::_arrayof_jlong_arraycopy;
++bool StubRoutines::riscv::_completed = false;
+diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
+new file mode 100644
+index 0000000000..0c9445e18a
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
+@@ -0,0 +1,155 @@
++/*
++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+    //*** oops
-+    {
-+      // With compressed oops we need unaligned versions; notice that
-+      // we overwrite entry_oop_arraycopy.
-+      bool aligned = !UseCompressedOops;
++#ifndef CPU_RISCV_STUBROUTINES_RISCV_HPP
++#define CPU_RISCV_STUBROUTINES_RISCV_HPP
 +
-+      StubRoutines::_arrayof_oop_disjoint_arraycopy
-+        = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy",
-+                                     /*dest_uninitialized*/false);
-+      StubRoutines::_arrayof_oop_arraycopy
-+        = generate_conjoint_oop_copy(aligned, entry, &entry_oop_arraycopy, "arrayof_oop_arraycopy",
-+                                     /*dest_uninitialized*/false);
-+      // Aligned versions without pre-barriers
-+      StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit
-+        = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy_uninit",
-+                                     /*dest_uninitialized*/true);
-+      StubRoutines::_arrayof_oop_arraycopy_uninit
-+        = generate_conjoint_oop_copy(aligned, entry, NULL, "arrayof_oop_arraycopy_uninit",
-+                                     /*dest_uninitialized*/true);
-+    }
++// This file holds the platform specific parts of the StubRoutines
++// definition. See stubRoutines.hpp for a description on how to
++// extend it.
 +
-+    StubRoutines::_oop_disjoint_arraycopy            = StubRoutines::_arrayof_oop_disjoint_arraycopy;
-+    StubRoutines::_oop_arraycopy                     = StubRoutines::_arrayof_oop_arraycopy;
-+    StubRoutines::_oop_disjoint_arraycopy_uninit     = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit;
-+    StubRoutines::_oop_arraycopy_uninit              = StubRoutines::_arrayof_oop_arraycopy_uninit;
++static bool returns_to_call_stub(address return_pc) {
++  return return_pc == _call_stub_return_address;
++}
 +
-+    StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
-+    StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
-+                                                                        /*dest_uninitialized*/true);
++enum platform_dependent_constants {
++  code_size1 = 19000,          // simply increase if too small (assembler will crash if too small)
++  code_size2 = 28000           // simply increase if too small (assembler will crash if too small)
++};
 +
++class riscv {
++ friend class StubGenerator;
 +
-+    StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy",
-+                                                              entry_jbyte_arraycopy,
-+                                                              entry_jshort_arraycopy,
-+                                                              entry_jint_arraycopy,
-+                                                              entry_jlong_arraycopy);
++ private:
++  static address _get_previous_sp_entry;
 +
-+    StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy",
-+                                                               entry_jbyte_arraycopy,
-+                                                               entry_jshort_arraycopy,
-+                                                               entry_jint_arraycopy,
-+                                                               entry_oop_arraycopy,
-+                                                               entry_jlong_arraycopy,
-+                                                               entry_checkcast_arraycopy);
++  static address _f2i_fixup;
++  static address _f2l_fixup;
++  static address _d2i_fixup;
++  static address _d2l_fixup;
 +
-+    StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
-+    StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
-+    StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
-+    StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
-+    StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
-+    StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
++  static address _float_sign_mask;
++  static address _float_sign_flip;
++  static address _double_sign_mask;
++  static address _double_sign_flip;
++
++  static address _zero_blocks;
++
++  static address _compare_long_string_LL;
++  static address _compare_long_string_LU;
++  static address _compare_long_string_UL;
++  static address _compare_long_string_UU;
++  static address _string_indexof_linear_ll;
++  static address _string_indexof_linear_uu;
++  static address _string_indexof_linear_ul;
++  static address _large_byte_array_inflate;
++
++  static bool _completed;
++
++ public:
++
++  static address get_previous_sp_entry() {
++    return _get_previous_sp_entry;
 +  }
 +
-+  // Safefetch stubs.
-+  void generate_safefetch(const char* name, int size, address* entry,
-+                          address* fault_pc, address* continuation_pc) {
-+    // safefetch signatures:
-+    //   int      SafeFetch32(int*      adr, int      errValue)
-+    //   intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue)
-+    //
-+    // arguments:
-+    //   c_rarg0 = adr
-+    //   c_rarg1 = errValue
-+    //
-+    // result:
-+    //   PPC_RET  = *adr or errValue
-+    assert_cond(entry != NULL && fault_pc != NULL && continuation_pc != NULL);
-+    StubCodeMark mark(this, "StubRoutines", name);
++  static address f2i_fixup() {
++    return _f2i_fixup;
++  }
 +
-+    // Entry point, pc or function descriptor.
-+    *entry = __ pc();
++  static address f2l_fixup() {
++    return _f2l_fixup;
++  }
 +
-+    // Load *adr into c_rarg1, may fault.
-+    *fault_pc = __ pc();
-+    switch (size) {
-+      case 4:
-+        // int32_t
-+        __ lw(c_rarg1, Address(c_rarg0, 0));
-+        break;
-+      case 8:
-+        // int64_t
-+        __ ld(c_rarg1, Address(c_rarg0, 0));
-+        break;
-+      default:
-+        ShouldNotReachHere();
-+    }
++  static address d2i_fixup() {
++    return _d2i_fixup;
++  }
 +
-+    // return errValue or *adr
-+    *continuation_pc = __ pc();
-+    __ mv(x10, c_rarg1);
-+    __ ret();
++  static address d2l_fixup() {
++    return _d2l_fixup;
 +  }
 +
-+  // code for comparing 16 bytes of strings with same encoding
-+  void compare_string_16_bytes_same(Label &DIFF1, Label &DIFF2) {
-+    const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, tmp1 = x28, tmp2 = x29, tmp4 = x7, tmp5 = x31;
-+    __ ld(tmp5, Address(str1));
-+    __ addi(str1, str1, 8);
-+    __ xorr(tmp4, tmp1, tmp2);
-+    __ ld(cnt1, Address(str2));
-+    __ addi(str2, str2, 8);
-+    __ bnez(tmp4, DIFF1);
-+    __ ld(tmp1, Address(str1));
-+    __ addi(str1, str1, 8);
-+    __ xorr(tmp4, tmp5, cnt1);
-+    __ ld(tmp2, Address(str2));
-+    __ addi(str2, str2, 8);
-+    __ bnez(tmp4, DIFF2);
++  static address float_sign_mask() {
++    return _float_sign_mask;
 +  }
 +
-+  // code for comparing 8 characters of strings with Latin1 and Utf16 encoding
-+  void compare_string_8_x_LU(Register tmpL, Register tmpU, Label &DIFF1,
-+                              Label &DIFF2) {
-+    const Register strU = x12, curU = x7, strL = x29, tmp = x30;
-+    __ ld(tmpL, Address(strL));
-+    __ addi(strL, strL, 8);
-+    __ ld(tmpU, Address(strU));
-+    __ addi(strU, strU, 8);
-+    __ inflate_lo32(tmp, tmpL);
-+    __ mv(t0, tmp);
-+    __ xorr(tmp, curU, t0);
-+    __ bnez(tmp, DIFF2);
++  static address float_sign_flip() {
++    return _float_sign_flip;
++  }
 +
-+    __ ld(curU, Address(strU));
-+    __ addi(strU, strU, 8);
-+    __ inflate_hi32(tmp, tmpL);
-+    __ mv(t0, tmp);
-+    __ xorr(tmp, tmpU, t0);
-+    __ bnez(tmp, DIFF1);
++  static address double_sign_mask() {
++    return _double_sign_mask;
 +  }
 +
-+  // x10  = result
-+  // x11  = str1
-+  // x12  = cnt1
-+  // x13  = str2
-+  // x14  = cnt2
-+  // x28  = tmp1
-+  // x29  = tmp2
-+  // x30  = tmp3
-+  address generate_compare_long_string_different_encoding(bool isLU) {
-+    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", isLU ? "compare_long_string_different_encoding LU" : "compare_long_string_different_encoding UL");
-+    address entry = __ pc();
-+    Label SMALL_LOOP, TAIL, TAIL_LOAD_16, LOAD_LAST, DIFF1, DIFF2,
-+          DONE, CALCULATE_DIFFERENCE;
-+    const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, cnt2 = x14,
-+                   tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x7, tmp5 = x31;
-+    RegSet spilled_regs = RegSet::of(tmp4, tmp5);
++  static address double_sign_flip() {
++    return _double_sign_flip;
++  }
 +
-+    // cnt2 == amount of characters left to compare
-+    // Check already loaded first 4 symbols
-+    __ inflate_lo32(tmp3, isLU ? tmp1 : tmp2);
-+    __ mv(isLU ? tmp1 : tmp2, tmp3);
-+    __ addi(str1, str1, isLU ? wordSize / 2 : wordSize);
-+    __ addi(str2, str2, isLU ? wordSize : wordSize / 2);
-+    __ sub(cnt2, cnt2, 8); // Already loaded 4 symbols. Last 4 is special case.
-+    __ push_reg(spilled_regs, sp);
++  static address zero_blocks() {
++    return _zero_blocks;
++  }
 +
-+    if (isLU) {
-+      __ add(str1, str1, cnt2);
-+      __ shadd(str2, cnt2, str2, t0, 1);
-+    } else {
-+      __ shadd(str1, cnt2, str1, t0, 1);
-+      __ add(str2, str2, cnt2);
-+    }
-+    __ xorr(tmp3, tmp1, tmp2);
-+    __ mv(tmp5, tmp2);
-+    __ bnez(tmp3, CALCULATE_DIFFERENCE);
++  static address compare_long_string_LL() {
++    return _compare_long_string_LL;
++  }
 +
-+    Register strU = isLU ? str2 : str1,
-+             strL = isLU ? str1 : str2,
-+             tmpU = isLU ? tmp5 : tmp1, // where to keep U for comparison
-+             tmpL = isLU ? tmp1 : tmp5; // where to keep L for comparison
++  static address compare_long_string_LU() {
++    return _compare_long_string_LU;
++  }
 +
-+    __ sub(tmp2, strL, cnt2); // strL pointer to load from
-+    __ slli(t0, cnt2, 1);
-+    __ sub(cnt1, strU, t0); // strU pointer to load from
++  static address compare_long_string_UL() {
++    return _compare_long_string_UL;
++  }
 +
-+    __ ld(tmp4, Address(cnt1));
-+    __ addi(cnt1, cnt1, 8);
-+    __ beqz(cnt2, LOAD_LAST); // no characters left except last load
-+    __ sub(cnt2, cnt2, 16);
-+    __ bltz(cnt2, TAIL);
-+    __ bind(SMALL_LOOP); // smaller loop
-+      __ sub(cnt2, cnt2, 16);
-+      compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2);
-+      compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2);
-+      __ bgez(cnt2, SMALL_LOOP);
-+      __ addi(t0, cnt2, 16);
-+      __ beqz(t0, LOAD_LAST);
-+    __ bind(TAIL); // 1..15 characters left until last load (last 4 characters)
-+      // Address of 8 bytes before last 4 characters in UTF-16 string
-+      __ shadd(cnt1, cnt2, cnt1, t0, 1);
-+      // Address of 16 bytes before last 4 characters in Latin1 string
-+      __ add(tmp2, tmp2, cnt2);
-+      __ ld(tmp4, Address(cnt1, -8));
-+      // last 16 characters before last load
-+      compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2);
-+      compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2);
-+      __ j(LOAD_LAST);
-+    __ bind(DIFF2);
-+      __ mv(tmpU, tmp4);
-+    __ bind(DIFF1);
-+      __ mv(tmpL, t0);
-+      __ j(CALCULATE_DIFFERENCE);
-+    __ bind(LOAD_LAST);
-+      // Last 4 UTF-16 characters are already pre-loaded into tmp4 by compare_string_8_x_LU.
-+      // No need to load it again
-+      __ mv(tmpU, tmp4);
-+      __ ld(tmpL, Address(strL));
-+      __ inflate_lo32(tmp3, tmpL);
-+      __ mv(tmpL, tmp3);
-+      __ xorr(tmp3, tmpU, tmpL);
-+      __ beqz(tmp3, DONE);
++  static address compare_long_string_UU() {
++    return _compare_long_string_UU;
++  }
 +
-+      // Find the first different characters in the longwords and
-+      // compute their difference.
-+    __ bind(CALCULATE_DIFFERENCE);
-+      __ ctzc_bit(tmp4, tmp3);
-+      __ srl(tmp1, tmp1, tmp4);
-+      __ srl(tmp5, tmp5, tmp4);
-+      __ andi(tmp1, tmp1, 0xFFFF);
-+      __ andi(tmp5, tmp5, 0xFFFF);
-+      __ sub(result, tmp1, tmp5);
-+    __ bind(DONE);
-+      __ pop_reg(spilled_regs, sp);
-+      __ ret();
-+    return entry;
++  static address string_indexof_linear_ul() {
++    return _string_indexof_linear_ul;
 +  }
 +
-+  address generate_method_entry_barrier() {
-+    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");
++  static address string_indexof_linear_ll() {
++    return _string_indexof_linear_ll;
++  }
 +
-+    Label deoptimize_label;
++  static address string_indexof_linear_uu() {
++    return _string_indexof_linear_uu;
++  }
 +
-+    address start = __ pc();
++  static address large_byte_array_inflate() {
++    return _large_byte_array_inflate;
++  }
 +
-+    __ set_last_Java_frame(sp, fp, ra, t0);
++  static bool complete() {
++    return _completed;
++  }
 +
-+    __ enter();
-+    __ add(t1, sp, wordSize);
++  static void set_completed() {
++    _completed = true;
++  }
++};
++
++#endif // CPU_RISCV_STUBROUTINES_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
+new file mode 100644
+index 0000000000..34c85e8145
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
+@@ -0,0 +1,1833 @@
++/*
++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+    __ sub(sp, sp, 4 * wordSize);
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "interpreter/bytecodeHistogram.hpp"
++#include "interpreter/bytecodeTracer.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/templateInterpreterGenerator.hpp"
++#include "interpreter/templateTable.hpp"
++#include "memory/resourceArea.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/method.hpp"
++#include "oops/methodData.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/jniHandles.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "runtime/timer.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/debug.hpp"
++#include <sys/types.h>
 +
-+    __ push_call_clobbered_registers();
++#ifndef PRODUCT
++#include "oops/method.hpp"
++#endif // !PRODUCT
 +
-+    __ mv(c_rarg0, t1);
-+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetNMethod::nmethod_stub_entry_barrier), 1);
++// Size of interpreter code.  Increase if too small.  Interpreter will
++// fail with a guarantee ("not enough space for interpreter generation");
++// if too small.
++// Run with +PrintInterpreter to get the VM to print out the size.
++// Max size with JVMTI
++int TemplateInterpreter::InterpreterCodeSize = 256 * 1024;
 +
-+    __ reset_last_Java_frame(true);
++#define __ _masm->
 +
-+    __ mv(t0, x10);
++//-----------------------------------------------------------------------------
 +
-+    __ pop_call_clobbered_registers();
++address TemplateInterpreterGenerator::generate_slow_signature_handler() {
++  address entry = __ pc();
 +
-+    __ bnez(t0, deoptimize_label);
++  __ andi(esp, esp, -16);
++  __ mv(c_rarg3, esp);
++  // xmethod
++  // xlocals
++  // c_rarg3: first stack arg - wordSize
++  // adjust sp
 +
-+    __ leave();
-+    __ ret();
++  __ addi(sp, c_rarg3, -18 * wordSize);
++  __ addi(sp, sp, -2 * wordSize);
++  __ sd(ra, Address(sp, 0));
 +
-+    __ BIND(deoptimize_label);
++  __ call_VM(noreg,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::slow_signature_handler),
++             xmethod, xlocals, c_rarg3);
 +
-+    __ ld(t0, Address(sp, 0));
-+    __ ld(fp, Address(sp, wordSize));
-+    __ ld(ra, Address(sp, wordSize * 2));
-+    __ ld(t1, Address(sp, wordSize * 3));
++  // x10: result handler
 +
-+    __ mv(sp, t0);
-+    __ jr(t1);
++  // Stack layout:
++  // sp: return address           <- sp
++  //      1 garbage
++  //      8 integer args (if static first is unused)
++  //      1 float/double identifiers
++  //      8 double args
++  //        stack args              <- esp
++  //        garbage
++  //        expression stack bottom
++  //        bcp (NULL)
++  //        ...
 +
-+    return start;
-+  }
++  // Restore ra
++  __ ld(ra, Address(sp, 0));
++  __ addi(sp, sp , 2 * wordSize);
 +
-+  // x10  = result
-+  // x11  = str1
-+  // x12  = cnt1
-+  // x13  = str2
-+  // x14  = cnt2
-+  // x28  = tmp1
-+  // x29  = tmp2
-+  // x30  = tmp3
-+  // x31  = tmp4
-+  address generate_compare_long_string_same_encoding(bool isLL) {
-+    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", isLL ?
-+                      "compare_long_string_same_encoding LL" : "compare_long_string_same_encoding UU");
-+    address entry = __ pc();
-+    Label SMALL_LOOP, CHECK_LAST, DIFF2, TAIL,
-+          LENGTH_DIFF, DIFF, LAST_CHECK_AND_LENGTH_DIFF;
-+    const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, cnt2 = x14,
-+                   tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x7, tmp5 = x31;
-+    RegSet spilled_regs = RegSet::of(tmp4, tmp5);
++  // Do FP first so we can use c_rarg3 as temp
++  __ lwu(c_rarg3, Address(sp, 9 * wordSize)); // float/double identifiers
 +
-+    // cnt1/cnt2 contains amount of characters to compare. cnt1 can be re-used
-+    // update cnt2 counter with already loaded 8 bytes
-+    __ sub(cnt2, cnt2, wordSize / (isLL ? 1 : 2));
-+    // update pointers, because of previous read
-+    __ add(str1, str1, wordSize);
-+    __ add(str2, str2, wordSize);
-+    // less than 16 bytes left?
-+    __ sub(cnt2, cnt2, isLL ? 16 : 8);
-+    __ push_reg(spilled_regs, sp);
-+    __ bltz(cnt2, TAIL);
-+    __ bind(SMALL_LOOP);
-+      compare_string_16_bytes_same(DIFF, DIFF2);
-+      __ sub(cnt2, cnt2, isLL ? 16 : 8);
-+      __ bgez(cnt2, SMALL_LOOP);
-+    __ bind(TAIL);
-+      __ addi(cnt2, cnt2, isLL ? 16 : 8);
-+      __ beqz(cnt2, LAST_CHECK_AND_LENGTH_DIFF);
-+      __ sub(cnt2, cnt2, isLL ? 8 : 4);
-+      __ blez(cnt2, CHECK_LAST);
-+      __ xorr(tmp4, tmp1, tmp2);
-+      __ bnez(tmp4, DIFF);
-+      __ ld(tmp1, Address(str1));
-+      __ addi(str1, str1, 8);
-+      __ ld(tmp2, Address(str2));
-+      __ addi(str2, str2, 8);
-+      __ sub(cnt2, cnt2, isLL ? 8 : 4);
-+    __ bind(CHECK_LAST);
-+      if (!isLL) {
-+        __ add(cnt2, cnt2, cnt2); // now in bytes
-+      }
-+      __ xorr(tmp4, tmp1, tmp2);
-+      __ bnez(tmp4, DIFF);
-+      __ add(str1, str1, cnt2);
-+      __ ld(tmp5, Address(str1));
-+      __ add(str2, str2, cnt2);
-+      __ ld(cnt1, Address(str2));
-+      __ xorr(tmp4, tmp5, cnt1);
-+      __ beqz(tmp4, LENGTH_DIFF);
-+      // Find the first different characters in the longwords and
-+      // compute their difference.
-+    __ bind(DIFF2);
-+      __ ctzc_bit(tmp3, tmp4, isLL); // count zero from lsb to msb
-+      __ srl(tmp5, tmp5, tmp3);
-+      __ srl(cnt1, cnt1, tmp3);
-+      if (isLL) {
-+        __ andi(tmp5, tmp5, 0xFF);
-+        __ andi(cnt1, cnt1, 0xFF);
-+      } else {
-+        __ andi(tmp5, tmp5, 0xFFFF);
-+        __ andi(cnt1, cnt1, 0xFFFF);
-+      }
-+      __ sub(result, tmp5, cnt1);
-+      __ j(LENGTH_DIFF);
-+    __ bind(DIFF);
-+      __ ctzc_bit(tmp3, tmp4, isLL); // count zero from lsb to msb
-+      __ srl(tmp1, tmp1, tmp3);
-+      __ srl(tmp2, tmp2, tmp3);
-+      if (isLL) {
-+        __ andi(tmp1, tmp1, 0xFF);
-+        __ andi(tmp2, tmp2, 0xFF);
-+      } else {
-+        __ andi(tmp1, tmp1, 0xFFFF);
-+        __ andi(tmp2, tmp2, 0xFFFF);
-+      }
-+      __ sub(result, tmp1, tmp2);
-+      __ j(LENGTH_DIFF);
-+    __ bind(LAST_CHECK_AND_LENGTH_DIFF);
-+      __ xorr(tmp4, tmp1, tmp2);
-+      __ bnez(tmp4, DIFF);
-+    __ bind(LENGTH_DIFF);
-+      __ pop_reg(spilled_regs, sp);
-+      __ ret();
-+    return entry;
++  for (int i = 0; i < Argument::n_float_register_parameters_c; i++) {
++    const FloatRegister r = g_FPArgReg[i];
++    Label d, done;
++
++    __ andi(t0, c_rarg3, 1UL << i);
++    __ bnez(t0, d);
++    __ flw(r, Address(sp, (10 + i) * wordSize));
++    __ j(done);
++    __ bind(d);
++    __ fld(r, Address(sp, (10 + i) * wordSize));
++    __ bind(done);
 +  }
 +
-+  void generate_compare_long_strings() {
-+    StubRoutines::riscv::_compare_long_string_LL = generate_compare_long_string_same_encoding(true);
-+    StubRoutines::riscv::_compare_long_string_UU = generate_compare_long_string_same_encoding(false);
-+    StubRoutines::riscv::_compare_long_string_LU = generate_compare_long_string_different_encoding(true);
-+    StubRoutines::riscv::_compare_long_string_UL = generate_compare_long_string_different_encoding(false);
++  // c_rarg0 contains the result from the call of
++  // InterpreterRuntime::slow_signature_handler so we don't touch it
++  // here.  It will be loaded with the JNIEnv* later.
++  for (int i = 1; i < Argument::n_int_register_parameters_c; i++) {
++    const Register rm = g_INTArgReg[i];
++    __ ld(rm, Address(sp, i * wordSize));
 +  }
 +
-+  // x10 result
-+  // x11 src
-+  // x12 src count
-+  // x13 pattern
-+  // x14 pattern count
-+  address generate_string_indexof_linear(bool needle_isL, bool haystack_isL)
-+  {
-+    const char* stubName = needle_isL
-+           ? (haystack_isL ? "indexof_linear_ll" : "indexof_linear_ul")
-+           : "indexof_linear_uu";
-+    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", stubName);
-+    address entry = __ pc();
++  __ addi(sp, sp, 18 * wordSize);
++  __ ret();
 +
-+    int needle_chr_size = needle_isL ? 1 : 2;
-+    int haystack_chr_size = haystack_isL ? 1 : 2;
-+    int needle_chr_shift = needle_isL ? 0 : 1;
-+    int haystack_chr_shift = haystack_isL ? 0 : 1;
-+    bool isL = needle_isL && haystack_isL;
-+    // parameters
-+    Register result = x10, haystack = x11, haystack_len = x12, needle = x13, needle_len = x14;
-+    // temporary registers
-+    Register mask1 = x20, match_mask = x21, first = x22, trailing_zeros = x23, mask2 = x24, tmp = x25;
-+    // redefinitions
-+    Register ch1 = x28, ch2 = x29;
-+    RegSet spilled_regs = RegSet::range(x20, x25) + RegSet::range(x28, x29);
++  return entry;
++}
 +
-+    __ push_reg(spilled_regs, sp);
++// Various method entries
++address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
++  // xmethod: Method*
++  // x30: sender sp
++  // esp: args
 +
-+    Label L_LOOP, L_LOOP_PROCEED, L_SMALL, L_HAS_ZERO,
-+          L_HAS_ZERO_LOOP, L_CMP_LOOP, L_CMP_LOOP_NOMATCH, L_SMALL_PROCEED,
-+          L_SMALL_HAS_ZERO_LOOP, L_SMALL_CMP_LOOP_NOMATCH, L_SMALL_CMP_LOOP,
-+          L_POST_LOOP, L_CMP_LOOP_LAST_CMP, L_HAS_ZERO_LOOP_NOMATCH,
-+          L_SMALL_CMP_LOOP_LAST_CMP, L_SMALL_CMP_LOOP_LAST_CMP2,
-+          L_CMP_LOOP_LAST_CMP2, DONE, NOMATCH;
++  if (!InlineIntrinsics) {
++    return NULL; // Generate a vanilla entry
++  }
 +
-+    __ ld(ch1, Address(needle));
-+    __ ld(ch2, Address(haystack));
-+    // src.length - pattern.length
-+    __ sub(haystack_len, haystack_len, needle_len);
++  // These don't need a safepoint check because they aren't virtually
++  // callable. We won't enter these intrinsics from compiled code.
++  // If in the future we added an intrinsic which was virtually callable
++  // we'd have to worry about how to safepoint so that this code is used.
 +
-+    // first is needle[0]
-+    __ andi(first, ch1, needle_isL ? 0xFF : 0xFFFF, first);
-+    uint64_t mask0101 = UCONST64(0x0101010101010101);
-+    uint64_t mask0001 = UCONST64(0x0001000100010001);
-+    __ mv(mask1, haystack_isL ? mask0101 : mask0001);
-+    __ mul(first, first, mask1);
-+    uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f);
-+    uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff);
-+    __ mv(mask2, haystack_isL ? mask7f7f : mask7fff);
-+    if (needle_isL != haystack_isL) {
-+      __ mv(tmp, ch1);
-+    }
-+    __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size - 1);
-+    __ blez(haystack_len, L_SMALL);
++  // mathematical functions inlined by compiler
++  // (interpreter must provide identical implementation
++  // in order to avoid monotonicity bugs when switching
++  // from interpreter to compiler in the middle of some
++  // computation)
++  //
++  // stack:
++  //        [ arg ] <-- esp
++  //        [ arg ]
++  // retaddr in ra
 +
-+    if (needle_isL != haystack_isL) {
-+      __ inflate_lo32(ch1, tmp, match_mask, trailing_zeros);
-+    }
-+    // xorr, sub, orr, notr, andr
-+    // compare and set match_mask[i] with 0x80/0x8000 (Latin1/UTF16) if ch2[i] == first[i]
-+    // eg:
-+    // first:        aa aa aa aa aa aa aa aa
-+    // ch2:          aa aa li nx jd ka aa aa
-+    // match_mask:   80 80 00 00 00 00 80 80
-+    __ compute_match_mask(ch2, first, match_mask, mask1, mask2);
++  address fn = NULL;
++  address entry_point = NULL;
++  Register continuation = ra;
++  switch (kind) {
++    case Interpreter::java_lang_math_abs:
++      entry_point = __ pc();
++      __ fld(f10, Address(esp));
++      __ fabs_d(f10, f10);
++      __ mv(sp, x30); // Restore caller's SP
++      break;
++    case Interpreter::java_lang_math_sqrt:
++      entry_point = __ pc();
++      __ fld(f10, Address(esp));
++      __ fsqrt_d(f10, f10);
++      __ mv(sp, x30);
++      break;
++    case Interpreter::java_lang_math_sin :
++      entry_point = __ pc();
++      __ fld(f10, Address(esp));
++      __ mv(sp, x30);
++      __ mv(x9, ra);
++      continuation = x9;  // The first callee-saved register
++      if (StubRoutines::dsin() == NULL) {
++        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin);
++      } else {
++        fn = CAST_FROM_FN_PTR(address, StubRoutines::dsin());
++      }
++      __ mv(t0, fn);
++      __ jalr(t0);
++      break;
++    case Interpreter::java_lang_math_cos :
++      entry_point = __ pc();
++      __ fld(f10, Address(esp));
++      __ mv(sp, x30);
++      __ mv(x9, ra);
++      continuation = x9;  // The first callee-saved register
++      if (StubRoutines::dcos() == NULL) {
++        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos);
++      } else {
++        fn = CAST_FROM_FN_PTR(address, StubRoutines::dcos());
++      }
++      __ mv(t0, fn);
++      __ jalr(t0);
++      break;
++    case Interpreter::java_lang_math_tan :
++      entry_point = __ pc();
++      __ fld(f10, Address(esp));
++      __ mv(sp, x30);
++      __ mv(x9, ra);
++      continuation = x9;  // The first callee-saved register
++      if (StubRoutines::dtan() == NULL) {
++        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan);
++      } else {
++        fn = CAST_FROM_FN_PTR(address, StubRoutines::dtan());
++      }
++      __ mv(t0, fn);
++      __ jalr(t0);
++      break;
++    case Interpreter::java_lang_math_log :
++      entry_point = __ pc();
++      __ fld(f10, Address(esp));
++      __ mv(sp, x30);
++      __ mv(x9, ra);
++      continuation = x9;  // The first callee-saved register
++      if (StubRoutines::dlog() == NULL) {
++        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog);
++      } else {
++        fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog());
++      }
++      __ mv(t0, fn);
++      __ jalr(t0);
++      break;
++    case Interpreter::java_lang_math_log10 :
++      entry_point = __ pc();
++      __ fld(f10, Address(esp));
++      __ mv(sp, x30);
++      __ mv(x9, ra);
++      continuation = x9;  // The first callee-saved register
++      if (StubRoutines::dlog10() == NULL) {
++        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10);
++      } else {
++        fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog10());
++      }
++      __ mv(t0, fn);
++      __ jalr(t0);
++      break;
++    case Interpreter::java_lang_math_exp :
++      entry_point = __ pc();
++      __ fld(f10, Address(esp));
++      __ mv(sp, x30);
++      __ mv(x9, ra);
++      continuation = x9;  // The first callee-saved register
++      if (StubRoutines::dexp() == NULL) {
++        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp);
++      } else {
++        fn = CAST_FROM_FN_PTR(address, StubRoutines::dexp());
++      }
++      __ mv(t0, fn);
++      __ jalr(t0);
++      break;
++    case Interpreter::java_lang_math_pow :
++      entry_point = __ pc();
++      __ mv(x9, ra);
++      continuation = x9;
++      __ fld(f10, Address(esp, 2 * Interpreter::stackElementSize));
++      __ fld(f11, Address(esp));
++      __ mv(sp, x30);
++      if (StubRoutines::dpow() == NULL) {
++        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow);
++      } else {
++        fn = CAST_FROM_FN_PTR(address, StubRoutines::dpow());
++      }
++      __ mv(t0, fn);
++      __ jalr(t0);
++      break;
++    case Interpreter::java_lang_math_fmaD :
++      if (UseFMA) {
++        entry_point = __ pc();
++        __ fld(f10, Address(esp, 4 * Interpreter::stackElementSize));
++        __ fld(f11, Address(esp, 2 * Interpreter::stackElementSize));
++        __ fld(f12, Address(esp));
++        __ fmadd_d(f10, f10, f11, f12);
++        __ mv(sp, x30); // Restore caller's SP
++      }
++      break;
++    case Interpreter::java_lang_math_fmaF :
++      if (UseFMA) {
++        entry_point = __ pc();
++        __ flw(f10, Address(esp, 2 * Interpreter::stackElementSize));
++        __ flw(f11, Address(esp, Interpreter::stackElementSize));
++        __ flw(f12, Address(esp));
++        __ fmadd_s(f10, f10, f11, f12);
++        __ mv(sp, x30); // Restore caller's SP
++      }
++      break;
++    default:
++      ;
++  }
++  if (entry_point != NULL) {
++    __ jr(continuation);
++  }
 +
-+    // search first char of needle, if success, goto L_HAS_ZERO;
-+    __ bnez(match_mask, L_HAS_ZERO);
-+    __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size);
-+    __ add(result, result, wordSize / haystack_chr_size);
-+    __ add(haystack, haystack, wordSize);
-+    __ bltz(haystack_len, L_POST_LOOP);
++  return entry_point;
++}
 +
-+    __ bind(L_LOOP);
-+    __ ld(ch2, Address(haystack));
-+    __ compute_match_mask(ch2, first, match_mask, mask1, mask2);
-+    __ bnez(match_mask, L_HAS_ZERO);
++// Abstract method entry
++// Attempt to execute abstract method. Throw exception
++address TemplateInterpreterGenerator::generate_abstract_entry(void) {
++  // xmethod: Method*
++  // x30: sender SP
 +
-+    __ bind(L_LOOP_PROCEED);
-+    __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size);
-+    __ add(haystack, haystack, wordSize);
-+    __ add(result, result, wordSize / haystack_chr_size);
-+    __ bgez(haystack_len, L_LOOP);
++  address entry_point = __ pc();
 +
-+    __ bind(L_POST_LOOP);
-+    __ mv(ch2, -wordSize / haystack_chr_size);
-+    __ ble(haystack_len, ch2, NOMATCH); // no extra characters to check
-+    __ ld(ch2, Address(haystack));
-+    __ slli(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift);
-+    __ neg(haystack_len, haystack_len);
-+    __ xorr(ch2, first, ch2);
-+    __ sub(match_mask, ch2, mask1);
-+    __ orr(ch2, ch2, mask2);
-+    __ mv(trailing_zeros, -1); // all bits set
-+    __ j(L_SMALL_PROCEED);
++  // abstract method entry
 +
-+    __ align(OptoLoopAlignment);
-+    __ bind(L_SMALL);
-+    __ slli(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift);
-+    __ neg(haystack_len, haystack_len);
-+    if (needle_isL != haystack_isL) {
-+      __ inflate_lo32(ch1, tmp, match_mask, trailing_zeros);
-+    }
-+    __ xorr(ch2, first, ch2);
-+    __ sub(match_mask, ch2, mask1);
-+    __ orr(ch2, ch2, mask2);
-+    __ mv(trailing_zeros, -1); // all bits set
++  //  pop return address, reset last_sp to NULL
++  __ empty_expression_stack();
++  __ restore_bcp();      // bcp must be correct for exception handler   (was destroyed)
++  __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
 +
-+    __ bind(L_SMALL_PROCEED);
-+    __ srl(trailing_zeros, trailing_zeros, haystack_len); // mask. zeroes on useless bits.
-+    __ notr(ch2, ch2);
-+    __ andr(match_mask, match_mask, ch2);
-+    __ andr(match_mask, match_mask, trailing_zeros); // clear useless bits and check
-+    __ beqz(match_mask, NOMATCH);
++  // throw exception
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                     InterpreterRuntime::throw_AbstractMethodErrorWithMethod),
++                                     xmethod);
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
 +
-+    __ bind(L_SMALL_HAS_ZERO_LOOP);
-+    __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, ch2, tmp); // count trailing zeros
-+    __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
-+    __ mv(ch2, wordSize / haystack_chr_size);
-+    __ ble(needle_len, ch2, L_SMALL_CMP_LOOP_LAST_CMP2);
-+    __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
-+    __ mv(trailing_zeros, wordSize / haystack_chr_size);
-+    __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH);
++  return entry_point;
++}
 +
-+    __ bind(L_SMALL_CMP_LOOP);
-+    __ shadd(first, trailing_zeros, needle, first, needle_chr_shift);
-+    __ shadd(ch2, trailing_zeros, haystack, ch2, haystack_chr_shift);
-+    needle_isL ? __ lbu(first, Address(first)) : __ lhu(first, Address(first));
-+    haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2));
-+    __ add(trailing_zeros, trailing_zeros, 1);
-+    __ bge(trailing_zeros, needle_len, L_SMALL_CMP_LOOP_LAST_CMP);
-+    __ beq(first, ch2, L_SMALL_CMP_LOOP);
++address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
++  address entry = __ pc();
 +
-+    __ bind(L_SMALL_CMP_LOOP_NOMATCH);
-+    __ beqz(match_mask, NOMATCH);
-+    __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, tmp, ch2);
-+    __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
-+    __ add(result, result, 1);
-+    __ add(haystack, haystack, haystack_chr_size);
-+    __ j(L_SMALL_HAS_ZERO_LOOP);
++#ifdef ASSERT
++  {
++    Label L;
++    __ ld(t0, Address(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize));
++    __ mv(t1, sp);
++    // maximal sp for current fp (stack grows negative)
++    // check if frame is complete
++    __ bge(t0, t1, L);
++    __ stop ("interpreter frame not set up");
++    __ bind(L);
++  }
++#endif // ASSERT
++  // Restore bcp under the assumption that the current frame is still
++  // interpreted
++  __ restore_bcp();
 +
-+    __ align(OptoLoopAlignment);
-+    __ bind(L_SMALL_CMP_LOOP_LAST_CMP);
-+    __ bne(first, ch2, L_SMALL_CMP_LOOP_NOMATCH);
-+    __ j(DONE);
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  // throw exception
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError));
++  return entry;
++}
 +
-+    __ align(OptoLoopAlignment);
-+    __ bind(L_SMALL_CMP_LOOP_LAST_CMP2);
-+    __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
-+    __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH);
-+    __ j(DONE);
++address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() {
++  address entry = __ pc();
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  // setup parameters
 +
-+    __ align(OptoLoopAlignment);
-+    __ bind(L_HAS_ZERO);
-+    __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, tmp, ch2);
-+    __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
-+    __ slli(needle_len, needle_len, BitsPerByte * wordSize / 2);
-+    __ orr(haystack_len, haystack_len, needle_len); // restore needle_len(32bits)
-+    __ sub(result, result, 1); // array index from 0, so result -= 1
++  // convention: expect aberrant index in register x11
++  __ zero_extend(c_rarg2, x11, 32);
++  // convention: expect array in register x13
++  __ mv(c_rarg1, x13);
++  __ call_VM(noreg,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::
++                              throw_ArrayIndexOutOfBoundsException),
++             c_rarg1, c_rarg2);
++  return entry;
++}
 +
-+    __ bind(L_HAS_ZERO_LOOP);
-+    __ mv(needle_len, wordSize / haystack_chr_size);
-+    __ srli(ch2, haystack_len, BitsPerByte * wordSize / 2);
-+    __ bge(needle_len, ch2, L_CMP_LOOP_LAST_CMP2);
-+    // load next 8 bytes from haystack, and increase result index
-+    __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
-+    __ add(result, result, 1);
-+    __ mv(trailing_zeros, wordSize / haystack_chr_size);
-+    __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH);
++address TemplateInterpreterGenerator::generate_ClassCastException_handler() {
++  address entry = __ pc();
 +
-+    // compare one char
-+    __ bind(L_CMP_LOOP);
-+    __ shadd(needle_len, trailing_zeros, needle, needle_len, needle_chr_shift);
-+    needle_isL ? __ lbu(needle_len, Address(needle_len)) : __ lhu(needle_len, Address(needle_len));
-+    __ shadd(ch2, trailing_zeros, haystack, ch2, haystack_chr_shift);
-+    haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2));
-+    __ add(trailing_zeros, trailing_zeros, 1); // next char index
-+    __ srli(tmp, haystack_len, BitsPerByte * wordSize / 2);
-+    __ bge(trailing_zeros, tmp, L_CMP_LOOP_LAST_CMP);
-+    __ beq(needle_len, ch2, L_CMP_LOOP);
++  // object is at TOS
++  __ pop_reg(c_rarg1);
 +
-+    __ bind(L_CMP_LOOP_NOMATCH);
-+    __ beqz(match_mask, L_HAS_ZERO_LOOP_NOMATCH);
-+    __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, needle_len, ch2); // find next "first" char index
-+    __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
-+    __ add(haystack, haystack, haystack_chr_size);
-+    __ j(L_HAS_ZERO_LOOP);
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
 +
-+    __ align(OptoLoopAlignment);
-+    __ bind(L_CMP_LOOP_LAST_CMP);
-+    __ bne(needle_len, ch2, L_CMP_LOOP_NOMATCH);
-+    __ j(DONE);
++  __ call_VM(noreg,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::
++                              throw_ClassCastException),
++             c_rarg1);
++  return entry;
++}
 +
-+    __ align(OptoLoopAlignment);
-+    __ bind(L_CMP_LOOP_LAST_CMP2);
-+    __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
-+    __ add(result, result, 1);
-+    __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH);
-+    __ j(DONE);
++address TemplateInterpreterGenerator::generate_exception_handler_common(
++  const char* name, const char* message, bool pass_oop) {
++  assert(!pass_oop || message == NULL, "either oop or message but not both");
++  address entry = __ pc();
++  if (pass_oop) {
++    // object is at TOS
++    __ pop_reg(c_rarg2);
++  }
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  // setup parameters
++  __ la(c_rarg1, Address((address)name));
++  if (pass_oop) {
++    __ call_VM(x10, CAST_FROM_FN_PTR(address,
++                                     InterpreterRuntime::
++                                     create_klass_exception),
++               c_rarg1, c_rarg2);
++  } else {
++    // kind of lame ExternalAddress can't take NULL because
++    // external_word_Relocation will assert.
++    if (message != NULL) {
++      __ la(c_rarg2, Address((address)message));
++    } else {
++      __ mv(c_rarg2, NULL_WORD);
++    }
++    __ call_VM(x10,
++               CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception),
++               c_rarg1, c_rarg2);
++  }
++  // throw exception
++  __ j(address(Interpreter::throw_exception_entry()));
++  return entry;
++}
 +
-+    __ align(OptoLoopAlignment);
-+    __ bind(L_HAS_ZERO_LOOP_NOMATCH);
-+    // 1) Restore "result" index. Index was wordSize/str2_chr_size * N until
-+    // L_HAS_ZERO block. Byte octet was analyzed in L_HAS_ZERO_LOOP,
-+    // so, result was increased at max by wordSize/str2_chr_size - 1, so,
-+    // respective high bit wasn't changed. L_LOOP_PROCEED will increase
-+    // result by analyzed characters value, so, we can just reset lower bits
-+    // in result here. Clear 2 lower bits for UU/UL and 3 bits for LL
-+    // 2) restore needle_len and haystack_len values from "compressed" haystack_len
-+    // 3) advance haystack value to represent next haystack octet. result & 7/3 is
-+    // index of last analyzed substring inside current octet. So, haystack in at
-+    // respective start address. We need to advance it to next octet
-+    __ andi(match_mask, result, wordSize / haystack_chr_size - 1);
-+    __ srli(needle_len, haystack_len, BitsPerByte * wordSize / 2);
-+    __ andi(result, result, haystack_isL ? -8 : -4);
-+    __ slli(tmp, match_mask, haystack_chr_shift);
-+    __ sub(haystack, haystack, tmp);
-+    __ addw(haystack_len, haystack_len, zr);
-+    __ j(L_LOOP_PROCEED);
++address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) {
++  address entry = __ pc();
 +
-+    __ align(OptoLoopAlignment);
-+    __ bind(NOMATCH);
-+    __ mv(result, -1);
++  // Restore stack bottom in case i2c adjusted stack
++  __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++  // and NULL it as marker that esp is now tos until next java call
++  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++  __ restore_bcp();
++  __ restore_locals();
++  __ restore_constant_pool_cache();
++  __ get_method(xmethod);
 +
-+    __ bind(DONE);
-+    __ pop_reg(spilled_regs, sp);
-+    __ ret();
-+    return entry;
++  if (state == atos) {
++    Register obj = x10;
++    Register mdp = x11;
++    Register tmp = x12;
++    __ ld(mdp, Address(xmethod, Method::method_data_offset()));
++    __ profile_return_type(mdp, obj, tmp);
 +  }
 +
-+  void generate_string_indexof_stubs()
-+  {
-+    StubRoutines::riscv::_string_indexof_linear_ll = generate_string_indexof_linear(true, true);
-+    StubRoutines::riscv::_string_indexof_linear_uu = generate_string_indexof_linear(false, false);
-+    StubRoutines::riscv::_string_indexof_linear_ul = generate_string_indexof_linear(true, false);
-+  }
++  // Pop N words from the stack
++  __ get_cache_and_index_at_bcp(x11, x12, 1, index_size);
++  __ ld(x11, Address(x11, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++  __ andi(x11, x11, ConstantPoolCacheEntry::parameter_size_mask);
 +
-+#ifdef COMPILER2
-+  address generate_mulAdd()
-+  {
-+    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", "mulAdd");
++  __ shadd(esp, x11, esp, t0, 3);
 +
-+    address entry = __ pc();
++  // Restore machine SP
++  __ ld(t0, Address(xmethod, Method::const_offset()));
++  __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
++  __ addi(t0, t0, frame::interpreter_frame_monitor_size() + 2);
++  __ ld(t1,
++        Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize));
++  __ slli(t0, t0, 3);
++  __ sub(t0, t1, t0);
++  __ andi(sp, t0, -16);
 +
-+    const Register out     = x10;
-+    const Register in      = x11;
-+    const Register offset  = x12;
-+    const Register len     = x13;
-+    const Register k       = x14;
-+    const Register tmp     = x28;
++ __ check_and_handle_popframe(xthread);
++ __ check_and_handle_earlyret(xthread);
 +
-+    BLOCK_COMMENT("Entry:");
-+    __ enter();
-+    __ mul_add(out, in, offset, len, k, tmp);
-+    __ leave();
-+    __ ret();
++  __ get_dispatch();
++  __ dispatch_next(state, step);
 +
-+    return entry;
-+  }
++  return entry;
++}
 +
-+  /**
-+   *  Arguments:
-+   *
-+   *  Input:
-+   *    c_rarg0   - x address
-+   *    c_rarg1   - x length
-+   *    c_rarg2   - y address
-+   *    c_rarg3   - y length
-+   *    c_rarg4   - z address
-+   *    c_rarg5   - z length
-+   */
-+  address generate_multiplyToLen()
-+  {
-+    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
-+    address entry = __ pc();
++address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state,
++                                                               int step,
++                                                               address continuation) {
++  address entry = __ pc();
++  __ restore_bcp();
++  __ restore_locals();
++  __ restore_constant_pool_cache();
++  __ get_method(xmethod);
++  __ get_dispatch();
 +
-+    const Register x     = x10;
-+    const Register xlen  = x11;
-+    const Register y     = x12;
-+    const Register ylen  = x13;
-+    const Register z     = x14;
-+    const Register zlen  = x15;
++  // Calculate stack limit
++  __ ld(t0, Address(xmethod, Method::const_offset()));
++  __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
++  __ addi(t0, t0, frame::interpreter_frame_monitor_size() + 2);
++  __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize));
++  __ slli(t0, t0, 3);
++  __ sub(t0, t1, t0);
++  __ andi(sp, t0, -16);
 +
-+    const Register tmp1  = x16;
-+    const Register tmp2  = x17;
-+    const Register tmp3  = x7;
-+    const Register tmp4  = x28;
-+    const Register tmp5  = x29;
-+    const Register tmp6  = x30;
-+    const Register tmp7  = x31;
++  // Restore expression stack pointer
++  __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++  // NULL last_sp until next java call
++  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
 +
-+    BLOCK_COMMENT("Entry:");
-+    __ enter(); // required for proper stackwalking of RuntimeStub frame
-+    __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
-+    __ leave(); // required for proper stackwalking of RuntimeStub frame
-+    __ ret();
++  // handle exceptions
++  {
++    Label L;
++    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
++    __ beqz(t0, L);
++    __ call_VM(noreg,
++               CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception));
++    __ should_not_reach_here();
++    __ bind(L);
++  }
 +
-+    return entry;
++  if (continuation == NULL) {
++    __ dispatch_next(state, step);
++  } else {
++    __ jump_to_entry(continuation);
 +  }
++  return entry;
++}
 +
-+  address generate_squareToLen()
-+  {
-+    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", "squareToLen");
-+    address entry = __ pc();
++address TemplateInterpreterGenerator::generate_result_handler_for(BasicType type) {
++  address entry = __ pc();
++  if (type == T_OBJECT) {
++    // retrieve result from frame
++    __ ld(x10, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize));
++    // and verify it
++    __ verify_oop(x10);
++  } else {
++   __ cast_primitive_type(type, x10);
++  }
 +
-+    const Register x     = x10;
-+    const Register xlen  = x11;
-+    const Register z     = x12;
-+    const Register zlen  = x13;
-+    const Register y     = x14; // == x
-+    const Register ylen  = x15; // == xlen
++  __ ret();                                  // return from result handler
++  return entry;
++}
 +
-+    const Register tmp1  = x16;
-+    const Register tmp2  = x17;
-+    const Register tmp3  = x7;
-+    const Register tmp4  = x28;
-+    const Register tmp5  = x29;
-+    const Register tmp6  = x30;
-+    const Register tmp7  = x31;
++address TemplateInterpreterGenerator::generate_safept_entry_for(TosState state,
++                                                                address runtime_entry) {
++  assert_cond(runtime_entry != NULL);
++  address entry = __ pc();
++  __ push(state);
++  __ call_VM(noreg, runtime_entry);
++  __ membar(MacroAssembler::AnyAny);
++  __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos));
++  return entry;
++}
 +
-+    BLOCK_COMMENT("Entry:");
-+    __ enter();
-+    __ mv(y, x);
-+    __ mv(ylen, xlen);
-+    __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
-+    __ leave();
-+    __ ret();
++// Helpers for commoning out cases in the various type of method entries.
++//
 +
-+    return entry;
-+  }
 +
-+  // Arguments:
-+  //
-+  // Input:
-+  //   c_rarg0   - newArr address
-+  //   c_rarg1   - oldArr address
-+  //   c_rarg2   - newIdx
-+  //   c_rarg3   - shiftCount
-+  //   c_rarg4   - numIter
-+  //
-+  address generate_bigIntegerLeftShift() {
-+    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", "bigIntegerLeftShiftWorker");
-+    address entry = __ pc();
++// increment invocation count & check for overflow
++//
++// Note: checking for negative value instead of overflow
++//       so we have a 'sticky' overflow test
++//
++// xmethod: method
++//
++void TemplateInterpreterGenerator::generate_counter_incr(
++        Label* overflow,
++        Label* profile_method,
++        Label* profile_method_continue) {
++  Label done;
++  // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not.
++  if (TieredCompilation) {
++    int increment = InvocationCounter::count_increment;
++    Label no_mdo;
++    if (ProfileInterpreter) {
++      // Are we profiling?
++      __ ld(x10, Address(xmethod, Method::method_data_offset()));
++      __ beqz(x10, no_mdo);
++      // Increment counter in the MDO
++      const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) +
++                                                in_bytes(InvocationCounter::counter_offset()));
++      const Address mask(x10, in_bytes(MethodData::invoke_mask_offset()));
++      __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow);
++      __ j(done);
++    }
++    __ bind(no_mdo);
++    // Increment counter in MethodCounters
++    const Address invocation_counter(t1,
++                  MethodCounters::invocation_counter_offset() +
++                  InvocationCounter::counter_offset());
++    __ get_method_counters(xmethod, t1, done);
++    const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset()));
++    __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow);
++    __ bind(done);
++  } else { // not TieredCompilation
++    const Address backedge_counter(t1,
++                  MethodCounters::backedge_counter_offset() +
++                  InvocationCounter::counter_offset());
++    const Address invocation_counter(t1,
++                  MethodCounters::invocation_counter_offset() +
++                  InvocationCounter::counter_offset());
 +
-+    Label loop, exit;
++    __ get_method_counters(xmethod, t1, done);
 +
-+    Register newArr        = c_rarg0;
-+    Register oldArr        = c_rarg1;
-+    Register newIdx        = c_rarg2;
-+    Register shiftCount    = c_rarg3;
-+    Register numIter       = c_rarg4;
++    if (ProfileInterpreter) { // %%% Merge this into MethodData*
++      __ lwu(x11, Address(t1, MethodCounters::interpreter_invocation_counter_offset()));
++      __ addw(x11, x11, 1);
++      __ sw(x11, Address(t1, MethodCounters::interpreter_invocation_counter_offset()));
++    }
++    // Update standard invocation counters
++    __ lwu(x11, invocation_counter);
++    __ lwu(x10, backedge_counter);
 +
-+    Register shiftRevCount = c_rarg5;
-+    Register oldArrNext    = t1;
++    __ addw(x11, x11, InvocationCounter::count_increment);
++    __ andi(x10, x10, InvocationCounter::count_mask_value);
 +
-+    __ beqz(numIter, exit);
-+    __ shadd(newArr, newIdx, newArr, t0, 2);
++    __ sw(x11, invocation_counter);
++    __ addw(x10, x10, x11);                // add both counters
 +
-+    __ li(shiftRevCount, 32);
-+    __ sub(shiftRevCount, shiftRevCount, shiftCount);
++    // profile_method is non-null only for interpreted method so
++    // profile_method != NULL == !native_call
 +
-+    __ bind(loop);
-+    __ addi(oldArrNext, oldArr, 4);
-+    __ vsetvli(t0, numIter, Assembler::e32, Assembler::m4);
-+    __ vle32_v(v0, oldArr);
-+    __ vle32_v(v4, oldArrNext);
-+    __ vsll_vx(v0, v0, shiftCount);
-+    __ vsrl_vx(v4, v4, shiftRevCount);
-+    __ vor_vv(v0, v0, v4);
-+    __ vse32_v(v0, newArr);
-+    __ sub(numIter, numIter, t0);
-+    __ shadd(oldArr, t0, oldArr, t1, 2);
-+    __ shadd(newArr, t0, newArr, t1, 2);
-+    __ bnez(numIter, loop);
++    if (ProfileInterpreter && profile_method != NULL) {
++      // Test to see if we should create a method data oop
++      __ ld(t1, Address(xmethod, Method::method_counters_offset()));
++      __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_profile_limit_offset())));
++      __ blt(x10, t1, *profile_method_continue);
 +
-+    __ bind(exit);
-+    __ ret();
++      // if no method data exists, go to profile_method
++      __ test_method_data_pointer(t1, *profile_method);
++    }
 +
-+    return entry;
++    {
++      __ ld(t1, Address(xmethod, Method::method_counters_offset()));
++      __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_invocation_limit_offset())));
++      __ bltu(x10, t1, done);
++      __ j(*overflow);
++    }
++    __ bind(done);
 +  }
++}
 +
-+  // Arguments:
-+  //
-+  // Input:
-+  //   c_rarg0   - newArr address
-+  //   c_rarg1   - oldArr address
-+  //   c_rarg2   - newIdx
-+  //   c_rarg3   - shiftCount
-+  //   c_rarg4   - numIter
-+  //
-+  address generate_bigIntegerRightShift() {
-+    __ align(CodeEntryAlignment);
-+    StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
-+    address entry = __ pc();
++void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) {
++  __ mv(c_rarg1, zr);
++  __ call_VM(noreg,
++             CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), c_rarg1);
++  __ j(do_continue);
++}
++
++// See if we've got enough room on the stack for locals plus overhead
++// below JavaThread::stack_overflow_limit(). If not, throw a StackOverflowError
++// without going through the signal handler, i.e., reserved and yellow zones
++// will not be made usable. The shadow zone must suffice to handle the
++// overflow.
++// The expression stack grows down incrementally, so the normal guard
++// page mechanism will work for that.
++//
++// NOTE: Since the additional locals are also always pushed (wasn't
++// obvious in generate_method_entry) so the guard should work for them
++// too.
++//
++// Args:
++//      x13: number of additional locals this frame needs (what we must check)
++//      xmethod: Method*
++//
++// Kills:
++//      x10
++void TemplateInterpreterGenerator::generate_stack_overflow_check(void) {
++
++  // monitor entry size: see picture of stack set
++  // (generate_method_entry) and frame_amd64.hpp
++  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
 +
-+    Label loop, exit;
++  // total overhead size: entry_size + (saved fp through expr stack
++  // bottom).  be sure to change this if you add/subtract anything
++  // to/from the overhead area
++  const int overhead_size =
++    -(frame::interpreter_frame_initial_sp_offset * wordSize) + entry_size;
 +
-+    Register newArr        = c_rarg0;
-+    Register oldArr        = c_rarg1;
-+    Register newIdx        = c_rarg2;
-+    Register shiftCount    = c_rarg3;
-+    Register numIter       = c_rarg4;
-+    Register idx           = numIter;
++  const int page_size = os::vm_page_size();
 +
-+    Register shiftRevCount = c_rarg5;
-+    Register oldArrNext    = c_rarg6;
-+    Register newArrCur     = t0;
-+    Register oldArrCur     = t1;
++  Label after_frame_check;
 +
-+    __ beqz(idx, exit);
-+    __ shadd(newArr, newIdx, newArr, t0, 2);
++  // see if the frame is greater than one page in size. If so,
++  // then we need to verify there is enough stack space remaining
++  // for the additional locals.
++  __ mv(t0, (page_size - overhead_size) / Interpreter::stackElementSize);
++  __ bleu(x13, t0, after_frame_check);
 +
-+    __ li(shiftRevCount, 32);
-+    __ sub(shiftRevCount, shiftRevCount, shiftCount);
++  // compute sp as if this were going to be the last frame on
++  // the stack before the red zone
 +
-+    __ bind(loop);
-+    __ vsetvli(t0, idx, Assembler::e32, Assembler::m4);
-+    __ sub(idx, idx, t0);
-+    __ shadd(oldArrNext, idx, oldArr, t1, 2);
-+    __ shadd(newArrCur, idx, newArr, t1, 2);
-+    __ addi(oldArrCur, oldArrNext, 4);
-+    __ vle32_v(v0, oldArrCur);
-+    __ vle32_v(v4, oldArrNext);
-+    __ vsrl_vx(v0, v0, shiftCount);
-+    __ vsll_vx(v4, v4, shiftRevCount);
-+    __ vor_vv(v0, v0, v4);
-+    __ vse32_v(v0, newArrCur);
-+    __ bnez(idx, loop);
++  // locals + overhead, in bytes
++  __ mv(x10, overhead_size);
++  __ shadd(x10, x13, x10, t0, Interpreter::logStackElementSize);  // 2 slots per parameter.
 +
-+    __ bind(exit);
-+    __ ret();
++  const Address stack_limit(xthread, JavaThread::stack_overflow_limit_offset());
++  __ ld(t0, stack_limit);
 +
-+    return entry;
-+  }
++#ifdef ASSERT
++  Label limit_okay;
++  // Verify that thread stack limit is non-zero.
++  __ bnez(t0, limit_okay);
++  __ stop("stack overflow limit is zero");
++  __ bind(limit_okay);
 +#endif
 +
-+#ifdef COMPILER2
-+  class MontgomeryMultiplyGenerator : public MacroAssembler {
++  // Add stack limit to locals.
++  __ add(x10, x10, t0);
 +
-+    Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Ra, Rb, Rm, Rn,
-+      Pa, Pb, Pn, Pm, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2, Ri, Rj;
++  // Check against the current stack bottom.
++  __ bgtu(sp, x10, after_frame_check);
 +
-+    RegSet _toSave;
-+    bool _squaring;
++  // Remove the incoming args, peeling the machine SP back to where it
++  // was in the caller.  This is not strictly necessary, but unless we
++  // do so the stack frame may have a garbage FP; this ensures a
++  // correct call stack that we can always unwind.  The ANDI should be
++  // unnecessary because the sender SP in x30 is always aligned, but
++  // it doesn't hurt.
++  __ andi(sp, x30, -16);
 +
-+  public:
-+    MontgomeryMultiplyGenerator (Assembler *as, bool squaring)
-+      : MacroAssembler(as->code()), _squaring(squaring) {
++  // Note: the restored frame is not necessarily interpreted.
++  // Use the shared runtime version of the StackOverflowError.
++  assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
++  __ far_jump(RuntimeAddress(StubRoutines::throw_StackOverflowError_entry()));
 +
-+      // Register allocation
++  // all done with frame size check
++  __ bind(after_frame_check);
++}
 +
-+      Register reg = c_rarg0;
-+      Pa_base = reg;       // Argument registers
-+      if (squaring) {
-+        Pb_base = Pa_base;
-+      } else {
-+        Pb_base = ++reg;
-+      }
-+      Pn_base = ++reg;
-+      Rlen= ++reg;
-+      inv = ++reg;
-+      Pm_base = ++reg;
++// Allocate monitor and lock method (asm interpreter)
++//
++// Args:
++//      xmethod: Method*
++//      xlocals: locals
++//
++// Kills:
++//      x10
++//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, ...(param regs)
++//      t0, t1 (temporary regs)
++void TemplateInterpreterGenerator::lock_method() {
++  // synchronize method
++  const Address access_flags(xmethod, Method::access_flags_offset());
++  const Address monitor_block_top(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
 +
-+                        // Working registers:
-+      Ra =  ++reg;      // The current digit of a, b, n, and m.
-+      Rb =  ++reg;
-+      Rm =  ++reg;
-+      Rn =  ++reg;
++#ifdef ASSERT
++  __ lwu(x10, access_flags);
++  __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method doesn't need synchronization", false);
++#endif // ASSERT
 +
-+      Pa =  ++reg;      // Pointers to the current/next digit of a, b, n, and m.
-+      Pb =  ++reg;
-+      Pm =  ++reg;
-+      Pn =  ++reg;
++  // get synchronization object
++  {
++    Label done;
++    __ lwu(x10, access_flags);
++    __ andi(t0, x10, JVM_ACC_STATIC);
++    // get receiver (assume this is frequent case)
++    __ ld(x10, Address(xlocals, Interpreter::local_offset_in_bytes(0)));
++    __ beqz(t0, done);
++    __ load_mirror(x10, xmethod);
 +
-+      tmp0 =  ++reg;    // Three registers which form a
-+      tmp1 =  ++reg;    // triple-precision accumuator.
-+      tmp2 =  ++reg;
++#ifdef ASSERT
++    {
++      Label L;
++      __ bnez(x10, L);
++      __ stop("synchronization object is NULL");
++      __ bind(L);
++    }
++#endif // ASSERT
 +
-+      Ri =  x6;         // Inner and outer loop indexes.
-+      Rj =  x7;
++    __ bind(done);
++  }
 +
-+      Rhi_ab = x28;     // Product registers: low and high parts
-+      Rlo_ab = x29;     // of a*b and m*n.
-+      Rhi_mn = x30;
-+      Rlo_mn = x31;
++  // add space for monitor & lock
++  __ add(sp, sp, - entry_size); // add space for a monitor entry
++  __ add(esp, esp, - entry_size);
++  __ mv(t0, esp);
++  __ sd(t0, monitor_block_top);  // set new monitor block top
++  // store object
++  __ sd(x10, Address(esp, BasicObjectLock::obj_offset_in_bytes()));
++  __ mv(c_rarg1, esp); // object address
++  __ lock_object(c_rarg1);
++}
 +
-+      // x18 and up are callee-saved.
-+      _toSave = RegSet::range(x18, reg) + Pm_base;
-+    }
++// Generate a fixed interpreter frame. This is identical setup for
++// interpreted methods and for native methods hence the shared code.
++//
++// Args:
++//      ra: return address
++//      xmethod: Method*
++//      xlocals: pointer to locals
++//      xcpool: cp cache
++//      stack_pointer: previous sp
++void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
++  // initialize fixed part of activation frame
++  if (native_call) {
++    __ add(esp, sp, - 14 * wordSize);
++    __ mv(xbcp, zr);
++    __ add(sp, sp, - 14 * wordSize);
++    // add 2 zero-initialized slots for native calls
++    __ sd(zr, Address(sp, 13 * wordSize));
++    __ sd(zr, Address(sp, 12 * wordSize));
++  } else {
++    __ add(esp, sp, - 12 * wordSize);
++    __ ld(t0, Address(xmethod, Method::const_offset()));     // get ConstMethod
++    __ add(xbcp, t0, in_bytes(ConstMethod::codes_offset())); // get codebase
++    __ add(sp, sp, - 12 * wordSize);
++  }
++  __ sd(xbcp, Address(sp, wordSize));
++  __ sd(esp, Address(sp, 0));
 +
-+  private:
-+    void save_regs() {
-+      push_reg(_toSave, sp);
-+    }
++  if (ProfileInterpreter) {
++    Label method_data_continue;
++    __ ld(t0, Address(xmethod, Method::method_data_offset()));
++    __ beqz(t0, method_data_continue);
++    __ la(t0, Address(t0, in_bytes(MethodData::data_offset())));
++    __ bind(method_data_continue);
++  }
 +
-+    void restore_regs() {
-+      pop_reg(_toSave, sp);
-+    }
++  __ sd(xmethod, Address(sp, 7 * wordSize));
++  __ sd(ProfileInterpreter ? t0 : zr, Address(sp, 6 * wordSize));
 +
-+    template <typename T>
-+    void unroll_2(Register count, T block) {
-+      Label loop, end, odd;
-+      beqz(count, end);
-+      andi(t0, count, 0x1);
-+      bnez(t0, odd);
-+      align(16);
-+      bind(loop);
-+      (this->*block)();
-+      bind(odd);
-+      (this->*block)();
-+      addi(count, count, -2);
-+      bgtz(count, loop);
-+      bind(end);
-+    }
++  // Get mirror and store it in the frame as GC root for this Method*
++#if INCLUDE_SHENANDOAHGC
++  if (UseShenandoahGC) {
++    __ load_mirror(x28, xmethod);
++    __ sd(zr, Address(sp, 5 * wordSize));
++    __ sd(x28, Address(sp, 4 * wordSize));
++  } else
++#endif
++  {
++    __ load_mirror(t2, xmethod);
++    __ sd(zr, Address(sp, 5 * wordSize));
++    __ sd(t2, Address(sp, 4 * wordSize));
++  }
 +
-+    template <typename T>
-+    void unroll_2(Register count, T block, Register d, Register s, Register tmp) {
-+      Label loop, end, odd;
-+      beqz(count, end);
-+      andi(tmp, count, 0x1);
-+      bnez(tmp, odd);
-+      align(16);
-+      bind(loop);
-+      (this->*block)(d, s, tmp);
-+      bind(odd);
-+      (this->*block)(d, s, tmp);
-+      addi(count, count, -2);
-+      bgtz(count, loop);
-+      bind(end);
-+    }
++  __ ld(xcpool, Address(xmethod, Method::const_offset()));
++  __ ld(xcpool, Address(xcpool, ConstMethod::constants_offset()));
++  __ ld(xcpool, Address(xcpool, ConstantPool::cache_offset_in_bytes()));
++  __ sd(xcpool, Address(sp, 3 * wordSize));
++  __ sd(xlocals, Address(sp, 2 * wordSize));
 +
-+    void pre1(RegisterOrConstant i) {
-+      block_comment("pre1");
-+      // Pa = Pa_base;
-+      // Pb = Pb_base + i;
-+      // Pm = Pm_base;
-+      // Pn = Pn_base + i;
-+      // Ra = *Pa;
-+      // Rb = *Pb;
-+      // Rm = *Pm;
-+      // Rn = *Pn;
-+      if (i.is_register()) {
-+        slli(t0, i.as_register(), LogBytesPerWord);
-+      } else {
-+        mv(t0, i.as_constant());
-+        slli(t0, t0, LogBytesPerWord);
-+      }
++  __ sd(ra, Address(sp, 11 * wordSize));
++  __ sd(fp, Address(sp, 10 * wordSize));
++  __ la(fp, Address(sp, 12 * wordSize)); // include ra & fp
 +
-+      mv(Pa, Pa_base);
-+      add(Pb, Pb_base, t0);
-+      mv(Pm, Pm_base);
-+      add(Pn, Pn_base, t0);
++  // set sender sp
++  // leave last_sp as null
++  __ sd(x30, Address(sp, 9 * wordSize));
++  __ sd(zr, Address(sp, 8 * wordSize));
 +
-+      ld(Ra, Address(Pa));
-+      ld(Rb, Address(Pb));
-+      ld(Rm, Address(Pm));
-+      ld(Rn, Address(Pn));
++  // Move SP out of the way
++  if (!native_call) {
++    __ ld(t0, Address(xmethod, Method::const_offset()));
++    __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
++    __ add(t0, t0, frame::interpreter_frame_monitor_size() + 2);
++    __ slli(t0, t0, 3);
++    __ sub(t0, sp, t0);
++    __ andi(sp, t0, -16);
++  }
++}
 +
-+      // Zero the m*n result.
-+      mv(Rhi_mn, zr);
-+      mv(Rlo_mn, zr);
-+    }
++// End of helpers
 +
-+    // The core multiply-accumulate step of a Montgomery
-+    // multiplication.  The idea is to schedule operations as a
-+    // pipeline so that instructions with long latencies (loads and
-+    // multiplies) have time to complete before their results are
-+    // used.  This most benefits in-order implementations of the
-+    // architecture but out-of-order ones also benefit.
-+    void step() {
-+      block_comment("step");
-+      // MACC(Ra, Rb, tmp0, tmp1, tmp2);
-+      // Ra = *++Pa;
-+      // Rb = *--Pb;
-+      mulhu(Rhi_ab, Ra, Rb);
-+      mul(Rlo_ab, Ra, Rb);
-+      addi(Pa, Pa, wordSize);
-+      ld(Ra, Address(Pa));
-+      addi(Pb, Pb, -wordSize);
-+      ld(Rb, Address(Pb));
-+      acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n from the
-+                                            // previous iteration.
-+      // MACC(Rm, Rn, tmp0, tmp1, tmp2);
-+      // Rm = *++Pm;
-+      // Rn = *--Pn;
-+      mulhu(Rhi_mn, Rm, Rn);
-+      mul(Rlo_mn, Rm, Rn);
-+      addi(Pm, Pm, wordSize);
-+      ld(Rm, Address(Pm));
-+      addi(Pn, Pn, -wordSize);
-+      ld(Rn, Address(Pn));
-+      acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
-+    }
++// Various method entries
++//------------------------------------------------------------------------------------------------------------------------
++//
++//
 +
-+    void post1() {
-+      block_comment("post1");
++// Method entry for java.lang.ref.Reference.get.
++address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
++  // Code: _aload_0, _getfield, _areturn
++  // parameter size = 1
++  //
++  // The code that gets generated by this routine is split into 2 parts:
++  //    1. The "intrinsified" code for G1 (or any SATB based GC),
++  //    2. The slow path - which is an expansion of the regular method entry.
++  //
++  // Notes:-
++  // * In the G1 code we do not check whether we need to block for
++  //   a safepoint. If G1 is enabled then we must execute the specialized
++  //   code for Reference.get (except when the Reference object is null)
++  //   so that we can log the value in the referent field with an SATB
++  //   update buffer.
++  //   If the code for the getfield template is modified so that the
++  //   G1 pre-barrier code is executed when the current method is
++  //   Reference.get() then going through the normal method entry
++  //   will be fine.
++  // * The G1 code can, however, check the receiver object (the instance
++  //   of java.lang.Reference) and jump to the slow path if null. If the
++  //   Reference object is null then we obviously cannot fetch the referent
++  //   and so we don't need to call the G1 pre-barrier. Thus we can use the
++  //   regular method entry code to generate the NPE.
++  //
++  // This code is based on generate_accessor_entry.
++  //
++  // xmethod: Method*
++  // x30: senderSP must preserve for slow path, set SP to it on fast path
 +
-+      // MACC(Ra, Rb, tmp0, tmp1, tmp2);
-+      // Ra = *++Pa;
-+      // Rb = *--Pb;
-+      mulhu(Rhi_ab, Ra, Rb);
-+      mul(Rlo_ab, Ra, Rb);
-+      acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2);  // The pending m*n
-+      acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
++  // ra is live.  It must be saved around calls.
 +
-+      // *Pm = Rm = tmp0 * inv;
-+      mul(Rm, tmp0, inv);
-+      sd(Rm, Address(Pm));
++  address entry = __ pc();
 +
-+      // MACC(Rm, Rn, tmp0, tmp1, tmp2);
-+      // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0;
-+      mulhu(Rhi_mn, Rm, Rn);
++  const int referent_offset = java_lang_ref_Reference::referent_offset;
++  guarantee(referent_offset > 0, "referent offset not initialized");
 +
-+#ifndef PRODUCT
-+      // assert(m[i] * n[0] + tmp0 == 0, "broken Montgomery multiply");
-+      {
-+        mul(Rlo_mn, Rm, Rn);
-+        add(Rlo_mn, tmp0, Rlo_mn);
-+        Label ok;
-+        beqz(Rlo_mn, ok);
-+        stop("broken Montgomery multiply");
-+        bind(ok);
-+      }
-+#endif
-+      // We have very carefully set things up so that
-+      // m[i]*n[0] + tmp0 == 0 (mod b), so we don't have to calculate
-+      // the lower half of Rm * Rn because we know the result already:
-+      // it must be -tmp0.  tmp0 + (-tmp0) must generate a carry iff
-+      // tmp0 != 0.  So, rather than do a mul and an cad we just set
-+      // the carry flag iff tmp0 is nonzero.
-+      //
-+      // mul(Rlo_mn, Rm, Rn);
-+      // cad(zr, tmp0, Rlo_mn);
-+      addi(t0, tmp0, -1);
-+      sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero
-+      cadc(tmp0, tmp1, Rhi_mn, t0);
-+      adc(tmp1, tmp2, zr, t0);
-+      mv(tmp2, zr);
-+    }
++  Label slow_path;
++  const Register local_0 = c_rarg0;
++  // Check if local 0 != NULL
++  // If the receiver is null then it is OK to jump to the slow path.
++  __ ld(local_0, Address(esp, 0));
++  __ beqz(local_0, slow_path);
 +
-+    void pre2(Register i, Register len) {
-+      block_comment("pre2");
-+      // Pa = Pa_base + i-len;
-+      // Pb = Pb_base + len;
-+      // Pm = Pm_base + i-len;
-+      // Pn = Pn_base + len;
++  __ mv(x9, x30);   // Move senderSP to a callee-saved register
 +
-+      sub(Rj, i, len);
-+      // Rj == i-len
++  // Load the value of the referent field.
++  const Address field_address(local_0, referent_offset);
++  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->load_at(_masm, IN_HEAP | ON_WEAK_OOP_REF, T_OBJECT, local_0, field_address, /*tmp1*/ t1, /*tmp2*/ t0);
 +
-+      // Ra as temp register
-+      slli(Ra, Rj, LogBytesPerWord);
-+      add(Pa, Pa_base, Ra);
-+      add(Pm, Pm_base, Ra);
-+      slli(Ra, len, LogBytesPerWord);
-+      add(Pb, Pb_base, Ra);
-+      add(Pn, Pn_base, Ra);
++  // areturn
++  __ andi(sp, x9, -16);  // done with stack
++  __ ret();
 +
-+      // Ra = *++Pa;
-+      // Rb = *--Pb;
-+      // Rm = *++Pm;
-+      // Rn = *--Pn;
-+      add(Pa, Pa, wordSize);
-+      ld(Ra, Address(Pa));
-+      add(Pb, Pb, -wordSize);
-+      ld(Rb, Address(Pb));
-+      add(Pm, Pm, wordSize);
-+      ld(Rm, Address(Pm));
-+      add(Pn, Pn, -wordSize);
-+      ld(Rn, Address(Pn));
++  // generate a vanilla interpreter entry as the slow path
++  __ bind(slow_path);
++  __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
++  return entry;
++}
 +
-+      mv(Rhi_mn, zr);
-+      mv(Rlo_mn, zr);
-+    }
++/**
++ * Method entry for static native methods:
++ *   int java.util.zip.CRC32.update(int crc, int b)
++ */
++address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
++  // TODO: Unimplemented generate_CRC32_update_entry
++  return 0;
++}
 +
-+    void post2(Register i, Register len) {
-+      block_comment("post2");
-+      sub(Rj, i, len);
++/**
++ * Method entry for static native methods:
++ *   int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
++ *   int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
++ */
++address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
++  // TODO: Unimplemented generate_CRC32_updateBytes_entry
++  return 0;
++}
 +
-+      cad(tmp0, tmp0, Rlo_mn, t0); // The pending m*n, low part
++/**
++ * Method entry for intrinsic-candidate (non-native) methods:
++ *   int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
++ *   int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int end)
++ * Unlike CRC32, CRC32C does not have any methods marked as native
++ * CRC32C also uses an "end" variable instead of the length variable CRC32 uses
++ */
++address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
++  // TODO: Unimplemented generate_CRC32C_updateBytes_entry
++  return 0;
++}
 +
-+      // As soon as we know the least significant digit of our result,
-+      // store it.
-+      // Pm_base[i-len] = tmp0;
-+      // Rj as temp register
-+      slli(Rj, Rj, LogBytesPerWord);
-+      add(Rj, Pm_base, Rj);
-+      sd(tmp0, Address(Rj));
++void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
++  // Bang each page in the shadow zone. We can't assume it's been done for
++  // an interpreter frame with greater than a page of locals, so each page
++  // needs to be checked.  Only true for non-native.
++  const int n_shadow_pages = JavaThread::stack_shadow_zone_size() / os::vm_page_size();
++  const int start_page = native_call ? n_shadow_pages : 1;
++  const int page_size = os::vm_page_size();
++  for (int pages = start_page; pages <= n_shadow_pages ; pages++) {
++    __ sub(t0, sp, pages * page_size);
++    __ sd(zr, Address(t0));
++  }
++}
 +
-+      // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0;
-+      cadc(tmp0, tmp1, Rhi_mn, t0); // The pending m*n, high part
-+      adc(tmp1, tmp2, zr, t0);
-+      mv(tmp2, zr);
-+    }
++// Interpreter stub for calling a native method. (asm interpreter)
++// This sets up a somewhat different looking stack for calling the
++// native method than the typical interpreter frame setup.
++address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
++  // determine code generation flags
++  bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods;
 +
-+    // A carry in tmp0 after Montgomery multiplication means that we
-+    // should subtract multiples of n from our result in m.  We'll
-+    // keep doing that until there is no carry.
-+    void normalize(Register len) {
-+      block_comment("normalize");
-+      // while (tmp0)
-+      //   tmp0 = sub(Pm_base, Pn_base, tmp0, len);
-+      Label loop, post, again;
-+      Register cnt = tmp1, i = tmp2; // Re-use registers; we're done with them now
-+      beqz(tmp0, post); {
-+        bind(again); {
-+          mv(i, zr);
-+          mv(cnt, len);
-+          slli(Rn, i, LogBytesPerWord);
-+          add(Rm, Pm_base, Rn);
-+          ld(Rm, Address(Rm));
-+          add(Rn, Pn_base, Rn);
-+          ld(Rn, Address(Rn));
-+          li(t0, 1); // set carry flag, i.e. no borrow
-+          align(16);
-+          bind(loop); {
-+            notr(Rn, Rn);
-+            add(Rm, Rm, t0);
-+            add(Rm, Rm, Rn);
-+            sltu(t0, Rm, Rn);
-+            slli(Rn, i, LogBytesPerWord); // Rn as temp register
-+            add(Rn, Pm_base, Rn);
-+            sd(Rm, Address(Rn));
-+            add(i, i, 1);
-+            slli(Rn, i, LogBytesPerWord);
-+            add(Rm, Pm_base, Rn);
-+            ld(Rm, Address(Rm));
-+            add(Rn, Pn_base, Rn);
-+            ld(Rn, Address(Rn));
-+            sub(cnt, cnt, 1);
-+          } bnez(cnt, loop);
-+          addi(tmp0, tmp0, -1);
-+          add(tmp0, tmp0, t0);
-+        } bnez(tmp0, again);
-+      } bind(post);
-+    }
++  // x11: Method*
++  // x30: sender sp
 +
-+    // Move memory at s to d, reversing words.
-+    //    Increments d to end of copied memory
-+    //    Destroys tmp1, tmp2
-+    //    Preserves len
-+    //    Leaves s pointing to the address which was in d at start
-+    void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) {
-+      assert(tmp1 < x28 && tmp2 < x28, "register corruption");
++  address entry_point = __ pc();
 +
-+      slli(tmp1, len, LogBytesPerWord);
-+      add(s, s, tmp1);
-+      mv(tmp1, len);
-+      unroll_2(tmp1,  &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2);
-+      slli(tmp1, len, LogBytesPerWord);
-+      sub(s, d, tmp1);
-+    }
-+    // [63...0] -> [31...0][63...32]
-+    void reverse1(Register d, Register s, Register tmp) {
-+      addi(s, s, -wordSize);
-+      ld(tmp, Address(s));
-+      ror_imm(tmp, tmp, 32, t0);
-+      sd(tmp, Address(d));
-+      addi(d, d, wordSize);
-+    }
++  const Address constMethod       (xmethod, Method::const_offset());
++  const Address access_flags      (xmethod, Method::access_flags_offset());
++  const Address size_of_parameters(x12, ConstMethod::
++                                   size_of_parameters_offset());
 +
-+    void step_squaring() {
-+      // An extra ACC
-+      step();
-+      acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
-+    }
++  // get parameter size (always needed)
++  __ ld(x12, constMethod);
++  __ load_unsigned_short(x12, size_of_parameters);
 +
-+    void last_squaring(Register i) {
-+      Label dont;
-+      // if ((i & 1) == 0) {
-+      andi(t0, i, 0x1);
-+      bnez(t0, dont); {
-+        // MACC(Ra, Rb, tmp0, tmp1, tmp2);
-+        // Ra = *++Pa;
-+        // Rb = *--Pb;
-+        mulhu(Rhi_ab, Ra, Rb);
-+        mul(Rlo_ab, Ra, Rb);
-+        acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
-+      } bind(dont);
-+    }
++  // Native calls don't need the stack size check since they have no
++  // expression stack and the arguments are already on the stack and
++  // we only add a handful of words to the stack.
 +
-+    void extra_step_squaring() {
-+      acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2);  // The pending m*n
++  // xmethod: Method*
++  // x12: size of parameters
++  // x30: sender sp
 +
-+      // MACC(Rm, Rn, tmp0, tmp1, tmp2);
-+      // Rm = *++Pm;
-+      // Rn = *--Pn;
-+      mulhu(Rhi_mn, Rm, Rn);
-+      mul(Rlo_mn, Rm, Rn);
-+      addi(Pm, Pm, wordSize);
-+      ld(Rm, Address(Pm));
-+      addi(Pn, Pn, -wordSize);
-+      ld(Rn, Address(Pn));
-+    }
++  // for natives the size of locals is zero
 +
-+    void post1_squaring() {
-+      acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2);  // The pending m*n
++  // compute beginning of parameters (xlocals)
++  __ shadd(xlocals, x12, esp, xlocals, 3);
++  __ addi(xlocals, xlocals, -wordSize);
 +
-+      // *Pm = Rm = tmp0 * inv;
-+      mul(Rm, tmp0, inv);
-+      sd(Rm, Address(Pm));
++  // Pull SP back to minimum size: this avoids holes in the stack
++  __ andi(sp, esp, -16);
 +
-+      // MACC(Rm, Rn, tmp0, tmp1, tmp2);
-+      // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0;
-+      mulhu(Rhi_mn, Rm, Rn);
++  // initialize fixed part of activation frame
++  generate_fixed_frame(true);
 +
-+#ifndef PRODUCT
-+      // assert(m[i] * n[0] + tmp0 == 0, "broken Montgomery multiply");
-+      {
-+        mul(Rlo_mn, Rm, Rn);
-+        add(Rlo_mn, tmp0, Rlo_mn);
-+        Label ok;
-+        beqz(Rlo_mn, ok); {
-+          stop("broken Montgomery multiply");
-+        } bind(ok);
-+      }
++  // make sure method is native & not abstract
++#ifdef ASSERT
++  __ lwu(x10, access_flags);
++  __ verify_access_flags(x10, JVM_ACC_NATIVE, "tried to execute non-native method as native", false);
++  __ verify_access_flags(x10, JVM_ACC_ABSTRACT, "tried to execute abstract method in interpreter");
 +#endif
-+      // We have very carefully set things up so that
-+      // m[i]*n[0] + tmp0 == 0 (mod b), so we don't have to calculate
-+      // the lower half of Rm * Rn because we know the result already:
-+      // it must be -tmp0.  tmp0 + (-tmp0) must generate a carry iff
-+      // tmp0 != 0.  So, rather than do a mul and a cad we just set
-+      // the carry flag iff tmp0 is nonzero.
-+      //
-+      // mul(Rlo_mn, Rm, Rn);
-+      // cad(zr, tmp, Rlo_mn);
-+      addi(t0, tmp0, -1);
-+      sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero
-+      cadc(tmp0, tmp1, Rhi_mn, t0);
-+      adc(tmp1, tmp2, zr, t0);
-+      mv(tmp2, zr);
-+    }
-+
-+    // use t0 as carry
-+    void acc(Register Rhi, Register Rlo,
-+             Register tmp0, Register tmp1, Register tmp2) {
-+      cad(tmp0, tmp0, Rlo, t0);
-+      cadc(tmp1, tmp1, Rhi, t0);
-+      adc(tmp2, tmp2, zr, t0);
-+    }
-+
-+  public:
-+    /**
-+     * Fast Montgomery multiplication.  The derivation of the
-+     * algorithm is in A Cryptographic Library for the Motorola
-+     * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
-+     *
-+     * Arguments:
-+     *
-+     * Inputs for multiplication:
-+     *   c_rarg0   - int array elements a
-+     *   c_rarg1   - int array elements b
-+     *   c_rarg2   - int array elements n (the modulus)
-+     *   c_rarg3   - int length
-+     *   c_rarg4   - int inv
-+     *   c_rarg5   - int array elements m (the result)
-+     *
-+     * Inputs for squaring:
-+     *   c_rarg0   - int array elements a
-+     *   c_rarg1   - int array elements n (the modulus)
-+     *   c_rarg2   - int length
-+     *   c_rarg3   - int inv
-+     *   c_rarg4   - int array elements m (the result)
-+     *
-+     */
-+    address generate_multiply() {
-+      Label argh, nothing;
-+      bind(argh);
-+      stop("MontgomeryMultiply total_allocation must be <= 8192");
 +
-+      align(CodeEntryAlignment);
-+      address entry = pc();
++  // Since at this point in the method invocation the exception
++  // handler would try to exit the monitor of synchronized methods
++  // which hasn't been entered yet, we set the thread local variable
++  // _do_not_unlock_if_synchronized to true. The remove_activation
++  // will check this flag.
 +
-+      beqz(Rlen, nothing);
++  const Address do_not_unlock_if_synchronized(xthread,
++                                              in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++  __ mv(t1, true);
++  __ sb(t1, do_not_unlock_if_synchronized);
 +
-+      enter();
++  // increment invocation count & check for overflow
++  Label invocation_counter_overflow;
++  if (inc_counter) {
++    generate_counter_incr(&invocation_counter_overflow, NULL, NULL);
++  }
 +
-+      // Make room.
-+      li(Ra, 512);
-+      bgt(Rlen, Ra, argh);
-+      slli(Ra, Rlen, exact_log2(4 * sizeof(jint)));
-+      sub(Ra, sp, Ra);
-+      andi(sp, Ra, -2 * wordSize);
++  Label continue_after_compile;
++  __ bind(continue_after_compile);
 +
-+      srliw(Rlen, Rlen, 1);  // length in longwords = len/2
++  bang_stack_shadow_pages(true);
 +
-+      {
-+        // Copy input args, reversing as we go.  We use Ra as a
-+        // temporary variable.
-+        reverse(Ra, Pa_base, Rlen, Ri, Rj);
-+        if (!_squaring)
-+          reverse(Ra, Pb_base, Rlen, Ri, Rj);
-+        reverse(Ra, Pn_base, Rlen, Ri, Rj);
-+      }
++  // reset the _do_not_unlock_if_synchronized flag
++  __ sb(zr, do_not_unlock_if_synchronized);
 +
-+      // Push all call-saved registers and also Pm_base which we'll need
-+      // at the end.
-+      save_regs();
++  // check for synchronized methods
++  // Must happen AFTER invocation_counter check and stack overflow check,
++  // so method is not locked if overflows.
++  if (synchronized) {
++    lock_method();
++  } else {
++    // no synchronization necessary
++#ifdef ASSERT
++    __ lwu(x10, access_flags);
++    __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method needs synchronization");
++#endif
++  }
 +
-+#ifndef PRODUCT
-+      // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
-+      {
-+        ld(Rn, Address(Pn_base));
-+        mul(Rlo_mn, Rn, inv);
-+        li(t0, -1);
-+        Label ok;
-+        beq(Rlo_mn, t0, ok);
-+        stop("broken inverse in Montgomery multiply");
-+        bind(ok);
-+      }
++  // start execution
++#ifdef ASSERT
++  __ verify_frame_setup();
 +#endif
 +
-+      mv(Pm_base, Ra);
++  // jvmti support
++  __ notify_method_entry();
 +
-+      mv(tmp0, zr);
-+      mv(tmp1, zr);
-+      mv(tmp2, zr);
++  // work registers
++  const Register t = x18;
++  const Register result_handler = x19;
 +
-+      block_comment("for (int i = 0; i < len; i++) {");
-+      mv(Ri, zr); {
-+        Label loop, end;
-+        bge(Ri, Rlen, end);
++  // allocate space for parameters
++  __ ld(t, Address(xmethod, Method::const_offset()));
++  __ load_unsigned_short(t, Address(t, ConstMethod::size_of_parameters_offset()));
 +
-+        bind(loop);
-+        pre1(Ri);
++  __ slli(t, t, Interpreter::logStackElementSize);
++  __ sub(x30, esp, t);
++  __ andi(sp, x30, -16);
++  __ mv(esp, x30);
 +
-+        block_comment("  for (j = i; j; j--) {"); {
-+          mv(Rj, Ri);
-+          unroll_2(Rj, &MontgomeryMultiplyGenerator::step);
-+        } block_comment("  } // j");
++  // get signature handler
++  {
++    Label L;
++    __ ld(t, Address(xmethod, Method::signature_handler_offset()));
++    __ bnez(t, L);
++    __ call_VM(noreg,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::prepare_native_call),
++               xmethod);
++    __ ld(t, Address(xmethod, Method::signature_handler_offset()));
++    __ bind(L);
++  }
 +
-+        post1();
-+        addw(Ri, Ri, 1);
-+        blt(Ri, Rlen, loop);
-+        bind(end);
-+        block_comment("} // i");
-+      }
++  // call signature handler
++  assert(InterpreterRuntime::SignatureHandlerGenerator::from() == xlocals,
++         "adjust this code");
++  assert(InterpreterRuntime::SignatureHandlerGenerator::to() == sp,
++         "adjust this code");
++  assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t0,
++          "adjust this code");
 +
-+      block_comment("for (int i = len; i < 2*len; i++) {");
-+      mv(Ri, Rlen); {
-+        Label loop, end;
-+        slli(t0, Rlen, 1);
-+        bge(Ri, t0, end);
++  // The generated handlers do not touch xmethod (the method).
++  // However, large signatures cannot be cached and are generated
++  // each time here.  The slow-path generator can do a GC on return,
++  // so we must reload it after the call.
++  __ jalr(t);
++  __ get_method(xmethod);        // slow path can do a GC, reload xmethod
 +
-+        bind(loop);
-+        pre2(Ri, Rlen);
 +
-+        block_comment("  for (j = len*2-i-1; j; j--) {"); {
-+          slliw(Rj, Rlen, 1);
-+          subw(Rj, Rj, Ri);
-+          subw(Rj, Rj, 1);
-+          unroll_2(Rj, &MontgomeryMultiplyGenerator::step);
-+        } block_comment("  } // j");
++  // result handler is in x10
++  // set result handler
++  __ mv(result_handler, x10);
++  // pass mirror handle if static call
++  {
++    Label L;
++    __ lwu(t, Address(xmethod, Method::access_flags_offset()));
++    __ andi(t0, t, JVM_ACC_STATIC);
++    __ beqz(t0, L);
++    // get mirror
++    __ load_mirror(t, xmethod);
++    // copy mirror into activation frame
++    __ sd(t, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize));
++    // pass handle to mirror
++    __ addi(c_rarg1, fp, frame::interpreter_frame_oop_temp_offset * wordSize);
++    __ bind(L);
++  }
 +
-+        post2(Ri, Rlen);
-+        addw(Ri, Ri, 1);
-+        slli(t0, Rlen, 1);
-+        blt(Ri, t0, loop);
-+        bind(end);
-+      }
-+      block_comment("} // i");
++  // get native function entry point in x28
++  {
++    Label L;
++    __ ld(x28, Address(xmethod, Method::native_function_offset()));
++    address unsatisfied = (SharedRuntime::native_method_throw_unsatisfied_link_error_entry());
++    __ mv(t1, unsatisfied);
++    __ ld(t1, t1);
++    __ bne(x28, t1, L);
++    __ call_VM(noreg,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::prepare_native_call),
++               xmethod);
++    __ get_method(xmethod);
++    __ ld(x28, Address(xmethod, Method::native_function_offset()));
++    __ bind(L);
++  }
 +
-+      normalize(Rlen);
++  // pass JNIEnv
++  __ add(c_rarg0, xthread, in_bytes(JavaThread::jni_environment_offset()));
 +
-+      mv(Ra, Pm_base);  // Save Pm_base in Ra
-+      restore_regs();  // Restore caller's Pm_base
++  // It is enough that the pc() points into the right code
++  // segment. It does not have to be the correct return pc.
++  Label native_return;
++  __ set_last_Java_frame(esp, fp, native_return, x30);
 +
-+      // Copy our result into caller's Pm_base
-+      reverse(Pm_base, Ra, Rlen, Ri, Rj);
++  // change thread state
++#ifdef ASSERT
++  {
++    Label L;
++    __ lwu(t, Address(xthread, JavaThread::thread_state_offset()));
++    __ addi(t0, zr, (u1)_thread_in_Java);
++    __ beq(t, t0, L);
++    __ stop("Wrong thread state in native stub");
++    __ bind(L);
++  }
++#endif
 +
-+      leave();
-+      bind(nothing);
-+      ret();
++  // Change state to native
++  __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
++  __ mv(t0, _thread_in_native);
++  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
++  __ sw(t0, Address(t1));
 +
-+      return entry;
-+    }
++  // Call the native method.
++  __ jalr(x28);
++  __ bind(native_return);
++  __ get_method(xmethod);
++  // result potentially in x10 or f10
 +
-+    /**
-+     *
-+     * Arguments:
-+     *
-+     * Inputs:
-+     *   c_rarg0   - int array elements a
-+     *   c_rarg1   - int array elements n (the modulus)
-+     *   c_rarg2   - int length
-+     *   c_rarg3   - int inv
-+     *   c_rarg4   - int array elements m (the result)
-+     *
-+     */
-+    address generate_square() {
-+      Label argh;
-+      bind(argh);
-+      stop("MontgomeryMultiply total_allocation must be <= 8192");
++  // make room for the pushes we're about to do
++  __ sub(t0, esp, 4 * wordSize);
++  __ andi(sp, t0, -16);
 +
-+      align(CodeEntryAlignment);
-+      address entry = pc();
++  // NOTE: The order of these pushes is known to frame::interpreter_frame_result
++  // in order to extract the result of a method call. If the order of these
++  // pushes change or anything else is added to the stack then the code in
++  // interpreter_frame_result must also change.
++  __ push(dtos);
++  __ push(ltos);
 +
-+      enter();
++  // change thread state
++  // Force all preceding writes to be observed prior to thread state change
++  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
 +
-+      // Make room.
-+      li(Ra, 512);
-+      bgt(Rlen, Ra, argh);
-+      slli(Ra, Rlen, exact_log2(4 * sizeof(jint)));
-+      sub(Ra, sp, Ra);
-+      andi(sp, Ra, -2 * wordSize);
++  __ mv(t0, _thread_in_native_trans);
++  __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
 +
-+      srliw(Rlen, Rlen, 1);  // length in longwords = len/2
++  // Force this write out before the read below
++  __ membar(MacroAssembler::AnyAny);
 +
-+      {
-+        // Copy input args, reversing as we go.  We use Ra as a
-+        // temporary variable.
-+        reverse(Ra, Pa_base, Rlen, Ri, Rj);
-+        reverse(Ra, Pn_base, Rlen, Ri, Rj);
-+      }
++  // check for safepoint operation in progress and/or pending suspend requests
++  {
++    Label L, Continue;
++    __ safepoint_poll_acquire(L);
++    __ lwu(t1, Address(xthread, JavaThread::suspend_flags_offset()));
++    __ beqz(t1, Continue);
++    __ bind(L);
 +
-+      // Push all call-saved registers and also Pm_base which we'll need
-+      // at the end.
-+      save_regs();
++    // Don't use call_VM as it will see a possible pending exception
++    // and forward it and never return here preventing us from
++    // clearing _last_native_pc down below. So we do a runtime call by
++    // hand.
++    //
++    __ mv(c_rarg0, xthread);
++    __ mv(t1, CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
++    __ jalr(t1);
++    __ get_method(xmethod);
++    __ reinit_heapbase();
++    __ bind(Continue);
++  }
 +
-+      mv(Pm_base, Ra);
++  // change thread state
++  // Force all preceding writes to be observed prior to thread state change
++  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
 +
-+      mv(tmp0, zr);
-+      mv(tmp1, zr);
-+      mv(tmp2, zr);
++  __ mv(t0, _thread_in_Java);
++  __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
 +
-+      block_comment("for (int i = 0; i < len; i++) {");
-+      mv(Ri, zr); {
-+        Label loop, end;
-+        bind(loop);
-+        bge(Ri, Rlen, end);
++  // reset_last_Java_frame
++  __ reset_last_Java_frame(true);
 +
-+        pre1(Ri);
++  if (CheckJNICalls) {
++    // clear_pending_jni_exception_check
++    __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset()));
++  }
 +
-+        block_comment("for (j = (i+1)/2; j; j--) {"); {
-+          addi(Rj, Ri, 1);
-+          srliw(Rj, Rj, 1);
-+          unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring);
-+        } block_comment("  } // j");
++  // reset handle block
++  __ ld(t, Address(xthread, JavaThread::active_handles_offset()));
++  __ sd(zr, Address(t, JNIHandleBlock::top_offset_in_bytes()));
 +
-+        last_squaring(Ri);
++  // If result is an oop unbox and store it in frame where gc will see it
++  // and result handler will pick it up
 +
-+        block_comment("  for (j = i/2; j; j--) {"); {
-+          srliw(Rj, Ri, 1);
-+          unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring);
-+        } block_comment("  } // j");
++  {
++    Label no_oop;
++    __ la(t, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT)));
++    __ bne(t, result_handler, no_oop);
++    // Unbox oop result, e.g. JNIHandles::resolve result.
++    __ pop(ltos);
++    __ resolve_jobject(x10, xthread, t);
++    __ sd(x10, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize));
++    // keep stack depth as expected by pushing oop which will eventually be discarded
++    __ push(ltos);
++    __ bind(no_oop);
++  }
 +
-+        post1_squaring();
-+        addi(Ri, Ri, 1);
-+        blt(Ri, Rlen, loop);
++  {
++    Label no_reguard;
++    __ lwu(t0, Address(xthread, in_bytes(JavaThread::stack_guard_state_offset())));
++    __ addi(t1, zr, (u1)JavaThread::stack_guard_yellow_reserved_disabled);
++    __ bne(t0, t1, no_reguard);
 +
-+        bind(end);
-+        block_comment("} // i");
-+      }
++    __ pusha(); // only save smashed registers
++    __ mv(c_rarg0, xthread);
++    __ mv(t1, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
++    __ jalr(t1);
++    __ popa(); // only restore smashed registers
++    __ bind(no_reguard);
++  }
 +
-+      block_comment("for (int i = len; i < 2*len; i++) {");
-+      mv(Ri, Rlen); {
-+        Label loop, end;
-+        bind(loop);
-+        slli(t0, Rlen, 1);
-+        bge(Ri, t0, end);
++  // The method register is junk from after the thread_in_native transition
++  // until here.  Also can't call_VM until the bcp has been
++  // restored.  Need bcp for throwing exception below so get it now.
++  __ get_method(xmethod);
 +
-+        pre2(Ri, Rlen);
++  // restore bcp to have legal interpreter frame, i.e., bci == 0 <=>
++  // xbcp == code_base()
++  __ ld(xbcp, Address(xmethod, Method::const_offset()));   // get ConstMethod*
++  __ add(xbcp, xbcp, in_bytes(ConstMethod::codes_offset()));          // get codebase
++  // handle exceptions (exception handling will handle unlocking!)
++  {
++    Label L;
++    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
++    __ beqz(t0, L);
++    // Note: At some point we may want to unify this with the code
++    // used in call_VM_base(); i.e., we should use the
++    // StubRoutines::forward_exception code. For now this doesn't work
++    // here because the sp is not correctly set at this point.
++    __ MacroAssembler::call_VM(noreg,
++                               CAST_FROM_FN_PTR(address,
++                               InterpreterRuntime::throw_pending_exception));
++    __ should_not_reach_here();
++    __ bind(L);
++  }
 +
-+        block_comment("  for (j = (2*len-i-1)/2; j; j--) {"); {
-+          slli(Rj, Rlen, 1);
-+          sub(Rj, Rj, Ri);
-+          sub(Rj, Rj, 1);
-+          srliw(Rj, Rj, 1);
-+          unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring);
-+        } block_comment("  } // j");
++  // do unlocking if necessary
++  {
++    Label L;
++    __ lwu(t, Address(xmethod, Method::access_flags_offset()));
++    __ andi(t0, t, JVM_ACC_SYNCHRONIZED);
++    __ beqz(t0, L);
++    // the code below should be shared with interpreter macro
++    // assembler implementation
++    {
++      Label unlock;
++      // BasicObjectLock will be first in list, since this is a
++      // synchronized method. However, need to check that the object
++      // has not been unlocked by an explicit monitorexit bytecode.
 +
-+        last_squaring(Ri);
++      // monitor expect in c_rarg1 for slow unlock path
++      __ la(c_rarg1, Address(fp,   // address of first monitor
++                             (intptr_t)(frame::interpreter_frame_initial_sp_offset *
++                                        wordSize - sizeof(BasicObjectLock))));
 +
-+        block_comment("  for (j = (2*len-i)/2; j; j--) {"); {
-+          slli(Rj, Rlen, 1);
-+          sub(Rj, Rj, Ri);
-+          srliw(Rj, Rj, 1);
-+          unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring);
-+        } block_comment("  } // j");
++      __ ld(t, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
++      __ bnez(t, unlock);
 +
-+        post2(Ri, Rlen);
-+        addi(Ri, Ri, 1);
-+        slli(t0, Rlen, 1);
-+        blt(Ri, t0, loop);
++      // Entry already unlocked, need to throw exception
++      __ MacroAssembler::call_VM(noreg,
++                                 CAST_FROM_FN_PTR(address,
++                                                  InterpreterRuntime::throw_illegal_monitor_state_exception));
++      __ should_not_reach_here();
 +
-+        bind(end);
-+        block_comment("} // i");
-+      }
++      __ bind(unlock);
++      __ unlock_object(c_rarg1);
++    }
++    __ bind(L);
++  }
 +
-+      normalize(Rlen);
++  // jvmti support
++  // Note: This must happen _after_ handling/throwing any exceptions since
++  //       the exception handler code notifies the runtime of method exits
++  //       too. If this happens before, method entry/exit notifications are
++  //       not properly paired (was bug - gri 11/22/99).
++  __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI);
 +
-+      mv(Ra, Pm_base);  // Save Pm_base in Ra
-+      restore_regs();  // Restore caller's Pm_base
++  __ pop(ltos);
++  __ pop(dtos);
 +
-+      // Copy our result into caller's Pm_base
-+      reverse(Pm_base, Ra, Rlen, Ri, Rj);
++  __ jalr(result_handler);
 +
-+      leave();
-+      ret();
++  // remove activation
++  __ ld(esp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender sp
++  // remove frame anchor
++  __ leave();
 +
-+      return entry;
-+    }
-+  };
-+#endif // COMPILER2
++  // restore sender sp
++  __ mv(sp, esp);
 +
-+  // Continuation point for throwing of implicit exceptions that are
-+  // not handled in the current activation. Fabricates an exception
-+  // oop and initiates normal exception dispatching in this
-+  // frame. Since we need to preserve callee-saved values (currently
-+  // only for C2, but done for C1 as well) we need a callee-saved oop
-+  // map and therefore have to make these stubs into RuntimeStubs
-+  // rather than BufferBlobs.  If the compiler needs all registers to
-+  // be preserved between the fault point and the exception handler
-+  // then it must assume responsibility for that in
-+  // AbstractCompiler::continuation_for_implicit_null_exception or
-+  // continuation_for_implicit_division_by_zero_exception. All other
-+  // implicit exceptions (e.g., NullPointerException or
-+  // AbstractMethodError on entry) are either at call sites or
-+  // otherwise assume that stack unwinding will be initiated, so
-+  // caller saved registers were assumed volatile in the compiler.
++  __ ret();
 +
-+#undef __
-+#define __ masm->
++  if (inc_counter) {
++    // Handle overflow of counter and compile method
++    __ bind(invocation_counter_overflow);
++    generate_counter_overflow(continue_after_compile);
++  }
 +
-+  address generate_throw_exception(const char* name,
-+                                   address runtime_entry,
-+                                   Register arg1 = noreg,
-+                                   Register arg2 = noreg) {
-+    // Information about frame layout at time of blocking runtime call.
-+    // Note that we only have to preserve callee-saved registers since
-+    // the compilers are responsible for supplying a continuation point
-+    // if they expect all registers to be preserved.
-+    // n.b. riscv asserts that frame::arg_reg_save_area_bytes == 0
-+    assert_cond(runtime_entry != NULL);
-+    enum layout {
-+      fp_off = 0,
-+      fp_off2,
-+      return_off,
-+      return_off2,
-+      framesize // inclusive of return address
-+    };
++  return entry_point;
++}
 +
-+    const int insts_size = 512;
-+    const int locs_size  = 64;
++//
++// Generic interpreted method entry to (asm) interpreter
++//
++address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
 +
-+    CodeBuffer code(name, insts_size, locs_size);
-+    OopMapSet* oop_maps  = new OopMapSet();
-+    MacroAssembler* masm = new MacroAssembler(&code);
-+    assert_cond(oop_maps != NULL && masm != NULL);
++  // determine code generation flags
++  const bool inc_counter  = UseCompiler || CountCompiledCalls || LogTouchedMethods;
 +
-+    address start = __ pc();
++  // t0: sender sp
++  address entry_point = __ pc();
 +
-+    // This is an inlined and slightly modified version of call_VM
-+    // which has the ability to fetch the return PC out of
-+    // thread-local storage and also sets up last_Java_sp slightly
-+    // differently than the real call_VM
++  const Address constMethod(xmethod, Method::const_offset());
++  const Address access_flags(xmethod, Method::access_flags_offset());
++  const Address size_of_parameters(x13,
++                                   ConstMethod::size_of_parameters_offset());
++  const Address size_of_locals(x13, ConstMethod::size_of_locals_offset());
 +
-+    __ enter(); // Save FP and RA before call
++  // get parameter size (always needed)
++  // need to load the const method first
++  __ ld(x13, constMethod);
++  __ load_unsigned_short(x12, size_of_parameters);
 +
-+    assert(is_even(framesize / 2), "sp not 16-byte aligned");
++  // x12: size of parameters
 +
-+    // ra and fp are already in place
-+    __ addi(sp, fp, 0 - ((unsigned)framesize << LogBytesPerInt)); // prolog
++  __ load_unsigned_short(x13, size_of_locals); // get size of locals in words
++  __ sub(x13, x13, x12); // x13 = no. of additional locals
 +
-+    int frame_complete = __ pc() - start;
++  // see if we've got enough room on the stack for locals plus overhead.
++  generate_stack_overflow_check();
 +
-+    // Set up last_Java_sp and last_Java_fp
-+    address the_pc = __ pc();
-+    __ set_last_Java_frame(sp, fp, the_pc, t0);
++  // compute beginning of parameters (xlocals)
++  __ shadd(xlocals, x12, esp, t1, 3);
++  __ add(xlocals, xlocals, -wordSize);
 +
-+    // Call runtime
-+    if (arg1 != noreg) {
-+      assert(arg2 != c_rarg1, "clobbered");
-+      __ mv(c_rarg1, arg1);
-+    }
-+    if (arg2 != noreg) {
-+      __ mv(c_rarg2, arg2);
-+    }
-+    __ mv(c_rarg0, xthread);
-+    BLOCK_COMMENT("call runtime_entry");
-+    int32_t offset = 0;
-+    __ movptr_with_offset(t0, runtime_entry, offset);
-+    __ jalr(x1, t0, offset);
++  // Make room for additional locals
++  __ slli(t1, x13, 3);
++  __ sub(t0, esp, t1);
 +
-+    // Generate oop map
-+    OopMap* map = new OopMap(framesize, 0);
-+    assert_cond(map != NULL);
++  // Padding between locals and fixed part of activation frame to ensure
++  // SP is always 16-byte aligned.
++  __ andi(sp, t0, -16);
 +
-+    oop_maps->add_gc_map(the_pc - start, map);
++  // x13 - # of additional locals
++  // allocate space for locals
++  // explicitly initialize locals
++  {
++    Label exit, loop;
++    __ blez(x13, exit); // do nothing if x13 <= 0
++    __ bind(loop);
++    __ sd(zr, Address(t0));
++    __ add(t0, t0, wordSize);
++    __ add(x13, x13, -1); // until everything initialized
++    __ bnez(x13, loop);
++    __ bind(exit);
++  }
 +
-+    __ reset_last_Java_frame(true);
++  // And the base dispatch table
++  __ get_dispatch();
 +
-+    __ leave();
++  // initialize fixed part of activation frame
++  generate_fixed_frame(false);
 +
-+    // check for pending exceptions
++  // make sure method is not native & not abstract
 +#ifdef ASSERT
-+    Label L;
-+    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
-+    __ bnez(t0, L);
-+    __ should_not_reach_here();
-+    __ bind(L);
-+#endif // ASSERT
-+    __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
-+
++  __ lwu(x10, access_flags);
++  __ verify_access_flags(x10, JVM_ACC_NATIVE, "tried to execute native method as non-native");
++  __ verify_access_flags(x10, JVM_ACC_ABSTRACT, "tried to execute abstract method in interpreter");
++#endif
 +
-+    // codeBlob framesize is in words (not VMRegImpl::slot_size)
-+    RuntimeStub* stub =
-+      RuntimeStub::new_runtime_stub(name,
-+                                    &code,
-+                                    frame_complete,
-+                                    (framesize >> (LogBytesPerWord - LogBytesPerInt)),
-+                                    oop_maps, false);
-+    assert(stub != NULL, "create runtime stub fail!");
-+    return stub->entry_point();
-+  }
++  // Since at this point in the method invocation the exception
++  // handler would try to exit the monitor of synchronized methods
++  // which hasn't been entered yet, we set the thread local variable
++  // _do_not_unlock_if_synchronized to true. The remove_activation
++  // will check this flag.
 +
-+  // Initialization
-+  void generate_initial() {
-+    // Generate initial stubs and initializes the entry points
++  const Address do_not_unlock_if_synchronized(xthread,
++                                              in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++  __ mv(t1, true);
++  __ sb(t1, do_not_unlock_if_synchronized);
 +
-+    // entry points that exist in all platforms Note: This is code
-+    // that could be shared among different platforms - however the
-+    // benefit seems to be smaller than the disadvantage of having a
-+    // much more complicated generator structure. See also comment in
-+    // stubRoutines.hpp.
++  Label no_mdp;
++  const Register mdp = x13;
++  __ ld(mdp, Address(xmethod, Method::method_data_offset()));
++  __ beqz(mdp, no_mdp);
++  __ add(mdp, mdp, in_bytes(MethodData::data_offset()));
++  __ profile_parameters_type(mdp, x11, x12, x14); // use x11, x12, x14 as tmp registers
++  __ bind(no_mdp);
 +
-+    StubRoutines::_forward_exception_entry = generate_forward_exception();
++  // increment invocation count & check for overflow
++  Label invocation_counter_overflow;
++  Label profile_method;
++  Label profile_method_continue;
++  if (inc_counter) {
++    generate_counter_incr(&invocation_counter_overflow,
++                          &profile_method,
++                          &profile_method_continue);
++    if (ProfileInterpreter) {
++      __ bind(profile_method_continue);
++    }
++  }
 +
-+    StubRoutines::_call_stub_entry =
-+      generate_call_stub(StubRoutines::_call_stub_return_address);
++  Label continue_after_compile;
++  __ bind(continue_after_compile);
 +
-+    // is referenced by megamorphic call
-+    StubRoutines::_catch_exception_entry = generate_catch_exception();
++  bang_stack_shadow_pages(false);
 +
-+    // Build this early so it's available for the interpreter.
-+    StubRoutines::_throw_StackOverflowError_entry =
-+      generate_throw_exception("StackOverflowError throw_exception",
-+                               CAST_FROM_FN_PTR(address,
-+                                                SharedRuntime::throw_StackOverflowError));
-+    StubRoutines::_throw_delayed_StackOverflowError_entry =
-+      generate_throw_exception("delayed StackOverflowError throw_exception",
-+                               CAST_FROM_FN_PTR(address,
-+                                                SharedRuntime::throw_delayed_StackOverflowError));
-+    // Safefetch stubs.
-+    generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
-+                                                       &StubRoutines::_safefetch32_fault_pc,
-+                                                       &StubRoutines::_safefetch32_continuation_pc);
-+    generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
-+                                                       &StubRoutines::_safefetchN_fault_pc,
-+                                                       &StubRoutines::_safefetchN_continuation_pc);
++  // reset the _do_not_unlock_if_synchronized flag
++  __ sb(zr, do_not_unlock_if_synchronized);
++
++  // check for synchronized methods
++  // Must happen AFTER invocation_counter check and stack overflow check,
++  // so method is not locked if overflows.
++  if (synchronized) {
++    // Allocate monitor and lock method
++    lock_method();
++  } else {
++    // no synchronization necessary
++#ifdef ASSERT
++    __ lwu(x10, access_flags);
++    __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method needs synchronization");
++#endif
 +  }
 +
-+  void generate_all() {
-+    // support for verify_oop (must happen after universe_init)
-+    StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
-+    StubRoutines::_throw_AbstractMethodError_entry =
-+      generate_throw_exception("AbstractMethodError throw_exception",
-+                               CAST_FROM_FN_PTR(address,
-+                                                SharedRuntime::
-+                                                throw_AbstractMethodError));
++  // start execution
++#ifdef ASSERT
++  __ verify_frame_setup();
++#endif
 +
-+    StubRoutines::_throw_IncompatibleClassChangeError_entry =
-+      generate_throw_exception("IncompatibleClassChangeError throw_exception",
-+                               CAST_FROM_FN_PTR(address,
-+                                                SharedRuntime::
-+                                                throw_IncompatibleClassChangeError));
++  // jvmti support
++  __ notify_method_entry();
 +
-+    StubRoutines::_throw_NullPointerException_at_call_entry =
-+      generate_throw_exception("NullPointerException at call throw_exception",
-+                               CAST_FROM_FN_PTR(address,
-+                                                SharedRuntime::
-+                                                throw_NullPointerException_at_call));
-+    // arraycopy stubs used by compilers
-+    generate_arraycopy_stubs();
++  __ dispatch_next(vtos);
 +
-+#ifdef COMPILER2
-+    if (UseMulAddIntrinsic) {
-+      StubRoutines::_mulAdd = generate_mulAdd();
++  // invocation counter overflow
++  if (inc_counter) {
++    if (ProfileInterpreter) {
++      // We have decided to profile this method in the interpreter
++      __ bind(profile_method);
++      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
++      __ set_method_data_pointer_for_bcp();
++      // don't think we need this
++      __ get_method(x11);
++      __ j(profile_method_continue);
 +    }
++    // Handle overflow of counter and compile method
++    __ bind(invocation_counter_overflow);
++    generate_counter_overflow(continue_after_compile);
++  }
 +
-+    if (UseMultiplyToLenIntrinsic) {
-+      StubRoutines::_multiplyToLen = generate_multiplyToLen();
-+    }
++  return entry_point;
++}
 +
-+    if (UseSquareToLenIntrinsic) {
-+      StubRoutines::_squareToLen = generate_squareToLen();
-+    }
++//-----------------------------------------------------------------------------
++// Exceptions
 +
-+    if (UseMontgomeryMultiplyIntrinsic) {
-+      StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
-+      MontgomeryMultiplyGenerator g(_masm, /*squaring*/false);
-+      StubRoutines::_montgomeryMultiply = g.generate_multiply();
-+    }
++void TemplateInterpreterGenerator::generate_throw_exception() {
++  // Entry point in previous activation (i.e., if the caller was
++  // interpreted)
++  Interpreter::_rethrow_exception_entry = __ pc();
++  // Restore sp to interpreter_frame_last_sp even though we are going
++  // to empty the expression stack for the exception processing.
++  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++  // x10: exception
++  // x13: return address/pc that threw exception
++  __ restore_bcp();    // xbcp points to call/send
++  __ restore_locals();
++  __ restore_constant_pool_cache();
++  __ reinit_heapbase();  // restore xheapbase as heapbase.
++  __ get_dispatch();
 +
-+    if (UseMontgomerySquareIntrinsic) {
-+      StubCodeMark mark(this, "StubRoutines", "montgomerySquare");
-+      MontgomeryMultiplyGenerator g(_masm, /*squaring*/true);
-+      StubRoutines::_montgomerySquare = g.generate_square();
-+    }
++  // Entry point for exceptions thrown within interpreter code
++  Interpreter::_throw_exception_entry = __ pc();
++  // If we came here via a NullPointerException on the receiver of a
++  // method, xthread may be corrupt.
++  __ get_method(xmethod);
++  // expression stack is undefined here
++  // x10: exception
++  // xbcp: exception bcp
++  __ verify_oop(x10);
++  __ mv(c_rarg1, x10);
 +
-+    if (UseRVVForBigIntegerShiftIntrinsics) {
-+      StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift();
-+      StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift();
-+    }
-+#endif
++  // expression stack must be empty before entering the VM in case of
++  // an exception
++  __ empty_expression_stack();
++  // find exception handler address and preserve exception oop
++  __ call_VM(x13,
++             CAST_FROM_FN_PTR(address,
++                          InterpreterRuntime::exception_handler_for_exception),
++             c_rarg1);
 +
-+    generate_compare_long_strings();
++  // Calculate stack limit
++  __ ld(t0, Address(xmethod, Method::const_offset()));
++  __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
++  __ add(t0, t0, frame::interpreter_frame_monitor_size() + 4);
++  __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize));
++  __ slli(t0, t0, 3);
++  __ sub(t0, t1, t0);
++  __ andi(sp, t0, -16);
 +
-+    generate_string_indexof_stubs();
++  // x10: exception handler entry point
++  // x13: preserved exception oop
++  // xbcp: bcp for exception handler
++  __ push_ptr(x13); // push exception which is now the only value on the stack
++  __ jr(x10); // jump to exception handler (may be _remove_activation_entry!)
 +
-+    BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
-+    if (bs_nm != NULL) {
-+      StubRoutines::riscv::_method_entry_barrier = generate_method_entry_barrier();
-+    }
++  // If the exception is not handled in the current frame the frame is
++  // removed and the exception is rethrown (i.e. exception
++  // continuation is _rethrow_exception).
++  //
++  // Note: At this point the bci is still the bxi for the instruction
++  // which caused the exception and the expression stack is
++  // empty. Thus, for any VM calls at this point, GC will find a legal
++  // oop map (with empty expression stack).
 +
-+    StubRoutines::riscv::set_completed();
++  //
++  // JVMTI PopFrame support
++  //
++
++  Interpreter::_remove_activation_preserving_args_entry = __ pc();
++  __ empty_expression_stack();
++  // Set the popframe_processing bit in pending_popframe_condition
++  // indicating that we are currently handling popframe, so that
++  // call_VMs that may happen later do not trigger new popframe
++  // handling cycles.
++  __ lwu(x13, Address(xthread, JavaThread::popframe_condition_offset()));
++  __ ori(x13, x13, JavaThread::popframe_processing_bit);
++  __ sw(x13, Address(xthread, JavaThread::popframe_condition_offset()));
++
++  {
++    // Check to see whether we are returning to a deoptimized frame.
++    // (The PopFrame call ensures that the caller of the popped frame is
++    // either interpreted or compiled and deoptimizes it if compiled.)
++    // In this case, we can't call dispatch_next() after the frame is
++    // popped, but instead must save the incoming arguments and restore
++    // them after deoptimization has occurred.
++    //
++    // Note that we don't compare the return PC against the
++    // deoptimization blob's unpack entry because of the presence of
++    // adapter frames in C2.
++    Label caller_not_deoptimized;
++    __ ld(c_rarg1, Address(fp, frame::return_addr_offset * wordSize));
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), c_rarg1);
++    __ bnez(x10, caller_not_deoptimized);
++
++    // Compute size of arguments for saving when returning to
++    // deoptimized caller
++    __ get_method(x10);
++    __ ld(x10, Address(x10, Method::const_offset()));
++    __ load_unsigned_short(x10, Address(x10, in_bytes(ConstMethod::
++                                                      size_of_parameters_offset())));
++    __ slli(x10, x10, Interpreter::logStackElementSize);
++    __ restore_locals();
++    __ sub(xlocals, xlocals, x10);
++    __ add(xlocals, xlocals, wordSize);
++    // Save these arguments
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address,
++                                           Deoptimization::
++                                           popframe_preserve_args),
++                          xthread, x10, xlocals);
++
++    __ remove_activation(vtos,
++                         /* throw_monitor_exception */ false,
++                         /* install_monitor_exception */ false,
++                         /* notify_jvmdi */ false);
++
++    // Inform deoptimization that it is responsible for restoring
++    // these arguments
++    __ mv(t0, JavaThread::popframe_force_deopt_reexecution_bit);
++    __ sw(t0, Address(xthread, JavaThread::popframe_condition_offset()));
++
++    // Continue in deoptimization handler
++    __ ret();
++
++    __ bind(caller_not_deoptimized);
 +  }
 +
-+ public:
-+  StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
-+    if (all) {
-+      generate_all();
-+    } else {
-+      generate_initial();
-+    }
++  __ remove_activation(vtos,
++                       /* throw_monitor_exception */ false,
++                       /* install_monitor_exception */ false,
++                       /* notify_jvmdi */ false);
++
++  // Restore the last_sp and null it out
++  __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++
++  __ restore_bcp();
++  __ restore_locals();
++  __ restore_constant_pool_cache();
++  __ get_method(xmethod);
++  __ get_dispatch();
++
++  // The method data pointer was incremented already during
++  // call profiling. We have to restore the mdp for the current bcp.
++  if (ProfileInterpreter) {
++    __ set_method_data_pointer_for_bcp();
 +  }
 +
-+  ~StubGenerator() {}
-+}; // end class declaration
++  // Clear the popframe condition flag
++  __ sw(zr, Address(xthread, JavaThread::popframe_condition_offset()));
++  assert(JavaThread::popframe_inactive == 0, "fix popframe_inactive");
 +
-+#define UCM_TABLE_MAX_ENTRIES 8
-+void StubGenerator_generate(CodeBuffer* code, bool all) {
-+  if (UnsafeCopyMemory::_table == NULL) {
-+    UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES);
++#if INCLUDE_JVMTI
++  {
++    Label L_done;
++
++    __ lbu(t0, Address(xbcp, 0));
++    __ mv(t1, Bytecodes::_invokestatic);
++    __ bne(t1, t0, L_done);
++
++    // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call.
++    // Detect such a case in the InterpreterRuntime function and return the member name argument,or NULL.
++
++    __ ld(c_rarg0, Address(xlocals, 0));
++    __ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null),c_rarg0, xmethod, xbcp);
++
++    __ beqz(x10, L_done);
++
++    __ sd(x10, Address(esp, 0));
++    __ bind(L_done);
 +  }
++#endif // INCLUDE_JVMTI
 +
-+  StubGenerator g(code, all);
++  // Restore machine SP
++  __ ld(t0, Address(xmethod, Method::const_offset()));
++  __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
++  __ add(t0, t0, frame::interpreter_frame_monitor_size() + 4);
++  __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize));
++  __ slliw(t0, t0, 3);
++  __ sub(t0, t1, t0);
++  __ andi(sp, t0, -16);
++
++  __ dispatch_next(vtos);
++  // end of PopFrame support
++
++  Interpreter::_remove_activation_entry = __ pc();
++
++  // preserve exception over this code sequence
++  __ pop_ptr(x10);
++  __ sd(x10, Address(xthread, JavaThread::vm_result_offset()));
++  // remove the activation (without doing throws on illegalMonitorExceptions)
++  __ remove_activation(vtos, false, true, false);
++  // restore exception
++  __ get_vm_result(x10, xthread);
++
++  // In between activations - previous activation type unknown yet
++  // compute continuation point - the continuation point expects the
++  // following registers set up:
++  //
++  // x10: exception
++  // ra: return address/pc that threw exception
++  // sp: expression stack of caller
++  // fp: fp of caller
++  // FIXME: There's no point saving ra here because VM calls don't trash it
++  __ sub(sp, sp, 2 * wordSize);
++  __ sd(x10, Address(sp, 0));                   // save exception
++  __ sd(ra, Address(sp, wordSize));             // save return address
++  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address,
++                                         SharedRuntime::exception_handler_for_return_address),
++                        xthread, ra);
++  __ mv(x11, x10);                              // save exception handler
++  __ ld(x10, Address(sp, 0));                   // restore exception
++  __ ld(ra, Address(sp, wordSize));             // restore return address
++  __ add(sp, sp, 2 * wordSize);
++  // We might be returning to a deopt handler that expects x13 to
++  // contain the exception pc
++  __ mv(x13, ra);
++  // Note that an "issuing PC" is actually the next PC after the call
++  __ jr(x11);                                   // jump to exception
++                                                // handler of caller
 +}
-diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
++
++//
++// JVMTI ForceEarlyReturn support
++//
++address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state)  {
++  address entry = __ pc();
++
++  __ restore_bcp();
++  __ restore_locals();
++  __ empty_expression_stack();
++  __ load_earlyret_value(state);
++
++  __ ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset()));
++  Address cond_addr(t0, JvmtiThreadState::earlyret_state_offset());
++
++  // Clear the earlyret state
++  assert(JvmtiThreadState::earlyret_inactive == 0, "should be");
++  __ sd(zr, cond_addr);
++
++  __ remove_activation(state,
++                       false, /* throw_monitor_exception */
++                       false, /* install_monitor_exception */
++                       true); /* notify_jvmdi */
++  __ ret();
++
++  return entry;
++}
++// end of ForceEarlyReturn support
++
++//-----------------------------------------------------------------------------
++// Helper for vtos entry point generation
++
++void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
++                                                         address& bep,
++                                                         address& cep,
++                                                         address& sep,
++                                                         address& aep,
++                                                         address& iep,
++                                                         address& lep,
++                                                         address& fep,
++                                                         address& dep,
++                                                         address& vep) {
++  assert(t != NULL && t->is_valid() && t->tos_in() == vtos, "illegal template");
++  Label L;
++  aep = __ pc();  __ push_ptr();  __ j(L);
++  fep = __ pc();  __ push_f();    __ j(L);
++  dep = __ pc();  __ push_d();    __ j(L);
++  lep = __ pc();  __ push_l();    __ j(L);
++  bep = cep = sep =
++  iep = __ pc();  __ push_i();
++  vep = __ pc();
++  __ bind(L);
++  generate_and_dispatch(t);
++}
++
++//-----------------------------------------------------------------------------
++
++// Non-product code
++#ifndef PRODUCT
++address TemplateInterpreterGenerator::generate_trace_code(TosState state) {
++  address entry = __ pc();
++
++  __ push_reg(ra);
++  __ push(state);
++  __ push_reg(RegSet::range(x10, x17) + RegSet::range(x5, x7) + RegSet::range(x28, x31), sp);
++  __ mv(c_rarg2, x10);  // Pass itos
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), c_rarg1, c_rarg2, c_rarg3);
++  __ pop_reg(RegSet::range(x10, x17) + RegSet::range(x5, x7) + RegSet::range(x28, x31), sp);
++  __ pop(state);
++  __ pop_reg(ra);
++  __ ret();                                   // return from result handler
++
++  return entry;
++}
++
++void TemplateInterpreterGenerator::count_bytecode() {
++  __ push_reg(t0);
++  __ push_reg(x10);
++  __ mv(x10, (address) &BytecodeCounter::_counter_value);
++  __ mv(t0, 1);
++  __ amoadd_d(zr, x10, t0, Assembler::aqrl);
++  __ pop_reg(x10);
++  __ pop_reg(t0);
++}
++
++void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { ; }
++
++void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { ; }
++
++void TemplateInterpreterGenerator::trace_bytecode(Template* t) {
++  // Call a little run-time stub to avoid blow-up for each bytecode.
++  // The run-time runtime saves the right registers, depending on
++  // the tosca in-state for the given template.
++
++  assert(Interpreter::trace_code(t->tos_in()) != NULL, "entry must have been generated");
++  __ jal(Interpreter::trace_code(t->tos_in()));
++  __ reinit_heapbase();
++}
++
++void TemplateInterpreterGenerator::stop_interpreter_at() {
++  Label L;
++  __ push_reg(t0);
++  __ mv(t0, (address) &BytecodeCounter::_counter_value);
++  __ ld(t0, Address(t0));
++  __ mv(t1, StopInterpreterAt);
++  __ bne(t0, t1, L);
++  __ ebreak();
++  __ bind(L);
++  __ pop_reg(t0);
++}
++
++#endif // !PRODUCT
+diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
 new file mode 100644
-index 00000000000..395a2d338e4
+index 0000000000..c22fd3bfcd
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
-@@ -0,0 +1,58 @@
++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
+@@ -0,0 +1,4000 @@
 +/*
 + * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
@@ -49728,5971 +46986,5533 @@ index 00000000000..395a2d338e4
 + */
 +
 +#include "precompiled.hpp"
-+#include "runtime/deoptimization.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/templateTable.hpp"
++#include "memory/universe.hpp"
++#include "oops/method.hpp"
++#include "oops/methodData.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/methodHandles.hpp"
 +#include "runtime/frame.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
 +#include "runtime/stubRoutines.hpp"
-+#include "runtime/thread.inline.hpp"
-+#include "utilities/globalDefinitions.hpp"
++#include "runtime/synchronizer.hpp"
 +
-+// Implementation of the platform-specific part of StubRoutines - for
-+// a description of how to extend it, see the stubRoutines.hpp file.
++#define __ _masm->
 +
-+address StubRoutines::riscv::_get_previous_sp_entry = NULL;
++// Platform-dependent initialization
 +
-+address StubRoutines::riscv::_f2i_fixup = NULL;
-+address StubRoutines::riscv::_f2l_fixup = NULL;
-+address StubRoutines::riscv::_d2i_fixup = NULL;
-+address StubRoutines::riscv::_d2l_fixup = NULL;
-+address StubRoutines::riscv::_float_sign_mask = NULL;
-+address StubRoutines::riscv::_float_sign_flip = NULL;
-+address StubRoutines::riscv::_double_sign_mask = NULL;
-+address StubRoutines::riscv::_double_sign_flip = NULL;
-+address StubRoutines::riscv::_zero_blocks = NULL;
-+address StubRoutines::riscv::_compare_long_string_LL = NULL;
-+address StubRoutines::riscv::_compare_long_string_UU = NULL;
-+address StubRoutines::riscv::_compare_long_string_LU = NULL;
-+address StubRoutines::riscv::_compare_long_string_UL = NULL;
-+address StubRoutines::riscv::_string_indexof_linear_ll = NULL;
-+address StubRoutines::riscv::_string_indexof_linear_uu = NULL;
-+address StubRoutines::riscv::_string_indexof_linear_ul = NULL;
-+address StubRoutines::riscv::_large_byte_array_inflate = NULL;
-+address StubRoutines::riscv::_method_entry_barrier = NULL;
++void TemplateTable::pd_initialize() {
++  // No RISC-V specific initialization
++}
 +
-+bool StubRoutines::riscv::_completed = false;
-diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
-new file mode 100644
-index 00000000000..51f07819c33
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
-@@ -0,0 +1,161 @@
-+/*
-+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++// Address computation: local variables
 +
-+#ifndef CPU_RISCV_STUBROUTINES_RISCV_HPP
-+#define CPU_RISCV_STUBROUTINES_RISCV_HPP
++static inline Address iaddress(int n) {
++  return Address(xlocals, Interpreter::local_offset_in_bytes(n));
++}
 +
-+// This file holds the platform specific parts of the StubRoutines
-+// definition. See stubRoutines.hpp for a description on how to
-+// extend it.
++static inline Address laddress(int n) {
++  return iaddress(n + 1);
++}
 +
-+static bool returns_to_call_stub(address return_pc) {
-+  return return_pc == _call_stub_return_address;
++static inline Address faddress(int n) {
++  return iaddress(n);
 +}
 +
-+enum platform_dependent_constants {
-+  code_size1 = 19000,          // simply increase if too small (assembler will crash if too small)
-+  code_size2 = 28000           // simply increase if too small (assembler will crash if too small)
-+};
++static inline Address daddress(int n) {
++  return laddress(n);
++}
 +
-+class riscv {
-+ friend class StubGenerator;
++static inline Address aaddress(int n) {
++  return iaddress(n);
++}
 +
-+ private:
-+  static address _get_previous_sp_entry;
++static inline Address iaddress(Register r, Register temp, InterpreterMacroAssembler* _masm) {
++  _masm->shadd(temp, r, xlocals, temp, 3);
++  return Address(temp, 0);
++}
 +
-+  static address _f2i_fixup;
-+  static address _f2l_fixup;
-+  static address _d2i_fixup;
-+  static address _d2l_fixup;
++static inline Address laddress(Register r, Register temp, InterpreterMacroAssembler* _masm) {
++  _masm->shadd(temp, r, xlocals, temp, 3);
++  return Address(temp, Interpreter::local_offset_in_bytes(1));;
++}
 +
-+  static address _float_sign_mask;
-+  static address _float_sign_flip;
-+  static address _double_sign_mask;
-+  static address _double_sign_flip;
++static inline Address faddress(Register r, Register temp, InterpreterMacroAssembler* _masm) {
++  return iaddress(r, temp, _masm);
++}
 +
-+  static address _zero_blocks;
++static inline Address daddress(Register r, Register temp, InterpreterMacroAssembler* _masm) {
++  return laddress(r, temp, _masm);
++}
 +
-+  static address _compare_long_string_LL;
-+  static address _compare_long_string_LU;
-+  static address _compare_long_string_UL;
-+  static address _compare_long_string_UU;
-+  static address _string_indexof_linear_ll;
-+  static address _string_indexof_linear_uu;
-+  static address _string_indexof_linear_ul;
-+  static address _large_byte_array_inflate;
++static inline Address aaddress(Register r, Register temp, InterpreterMacroAssembler* _masm) {
++  return iaddress(r, temp, _masm);
++}
 +
-+  static address _method_entry_barrier;
++static inline Address at_rsp() {
++  return Address(esp, 0);
++}
 +
-+  static bool _completed;
++// At top of Java expression stack which may be different than esp().  It
++// isn't for category 1 objects.
++static inline Address at_tos   () {
++  return Address(esp,  Interpreter::expr_offset_in_bytes(0));
++}
 +
-+ public:
++static inline Address at_tos_p1() {
++  return Address(esp,  Interpreter::expr_offset_in_bytes(1));
++}
 +
-+  static address get_previous_sp_entry() {
-+    return _get_previous_sp_entry;
-+  }
++static inline Address at_tos_p2() {
++  return Address(esp,  Interpreter::expr_offset_in_bytes(2));
++}
 +
-+  static address f2i_fixup() {
-+    return _f2i_fixup;
-+  }
++static inline Address at_tos_p3() {
++  return Address(esp,  Interpreter::expr_offset_in_bytes(3));
++}
 +
-+  static address f2l_fixup() {
-+    return _f2l_fixup;
-+  }
++static inline Address at_tos_p4() {
++  return Address(esp,  Interpreter::expr_offset_in_bytes(4));
++}
 +
-+  static address d2i_fixup() {
-+    return _d2i_fixup;
-+  }
++static inline Address at_tos_p5() {
++  return Address(esp,  Interpreter::expr_offset_in_bytes(5));
++}
 +
-+  static address d2l_fixup() {
-+    return _d2l_fixup;
-+  }
++// Miscelaneous helper routines
++// Store an oop (or NULL) at the Address described by obj.
++// If val == noreg this means store a NULL
++static void do_oop_store(InterpreterMacroAssembler* _masm,
++                         Address dst,
++                         Register val,
++                         DecoratorSet decorators) {
++  assert(val == noreg || val == x10, "parameter is just for looks");
++  __ store_heap_oop(dst, val, x29, x11, decorators);
++}
 +
-+  static address float_sign_mask() {
-+    return _float_sign_mask;
-+  }
++static void do_oop_load(InterpreterMacroAssembler* _masm,
++                        Address src,
++                        Register dst,
++                        DecoratorSet decorators) {
++  __ load_heap_oop(dst, src, x7, x11, decorators);
++}
 +
-+  static address float_sign_flip() {
-+    return _float_sign_flip;
-+  }
++Address TemplateTable::at_bcp(int offset) {
++  assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
++  return Address(xbcp, offset);
++}
 +
-+  static address double_sign_mask() {
-+    return _double_sign_mask;
-+  }
++void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
++                                   Register temp_reg, bool load_bc_into_bc_reg/*=true*/,
++                                   int byte_no)
++{
++  if (!RewriteBytecodes)  { return; }
++  Label L_patch_done;
 +
-+  static address double_sign_flip() {
-+    return _double_sign_flip;
++  switch (bc) {
++    case Bytecodes::_fast_aputfield:  // fall through
++    case Bytecodes::_fast_bputfield:  // fall through
++    case Bytecodes::_fast_zputfield:  // fall through
++    case Bytecodes::_fast_cputfield:  // fall through
++    case Bytecodes::_fast_dputfield:  // fall through
++    case Bytecodes::_fast_fputfield:  // fall through
++    case Bytecodes::_fast_iputfield:  // fall through
++    case Bytecodes::_fast_lputfield:  // fall through
++    case Bytecodes::_fast_sputfield: {
++      // We skip bytecode quickening for putfield instructions when
++      // the put_code written to the constant pool cache is zero.
++      // This is required so that every execution of this instruction
++      // calls out to InterpreterRuntime::resolve_get_put to do
++      // additional, required work.
++      assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
++      assert(load_bc_into_bc_reg, "we use bc_reg as temp");
++      __ get_cache_and_index_and_bytecode_at_bcp(temp_reg, bc_reg, temp_reg, byte_no, 1);
++      __ mv(bc_reg, bc);
++      __ beqz(temp_reg, L_patch_done);
++      break;
++    }
++    default:
++      assert(byte_no == -1, "sanity");
++      // the pair bytecodes have already done the load.
++      if (load_bc_into_bc_reg) {
++        __ mv(bc_reg, bc);
++      }
 +  }
 +
-+  static address zero_blocks() {
-+    return _zero_blocks;
++  if (JvmtiExport::can_post_breakpoint()) {
++    Label L_fast_patch;
++    // if a breakpoint is present we can't rewrite the stream directly
++    __ load_unsigned_byte(temp_reg, at_bcp(0));
++    __ addi(temp_reg, temp_reg, -Bytecodes::_breakpoint); // temp_reg is temporary register.
++    __ bnez(temp_reg, L_fast_patch);
++    // Let breakpoint table handling rewrite to quicker bytecode
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), xmethod, xbcp, bc_reg);
++    __ j(L_patch_done);
++    __ bind(L_fast_patch);
 +  }
 +
-+  static address compare_long_string_LL() {
-+    return _compare_long_string_LL;
-+  }
++#ifdef ASSERT
++  Label L_okay;
++  __ load_unsigned_byte(temp_reg, at_bcp(0));
++  __ beq(temp_reg, bc_reg, L_okay);
++  __ addi(temp_reg, temp_reg, -(int) Bytecodes::java_code(bc));
++  __ beqz(temp_reg, L_okay);
++  __ stop("patching the wrong bytecode");
++  __ bind(L_okay);
++#endif
 +
-+  static address compare_long_string_LU() {
-+    return _compare_long_string_LU;
-+  }
++  // patch bytecode
++  __ sb(bc_reg, at_bcp(0));
++  __ bind(L_patch_done);
++}
 +
-+  static address compare_long_string_UL() {
-+    return _compare_long_string_UL;
-+  }
++// Individual instructions
 +
-+  static address compare_long_string_UU() {
-+    return _compare_long_string_UU;
-+  }
++void TemplateTable::nop() {
++  transition(vtos, vtos);
++  // nothing to do
++}
 +
-+  static address string_indexof_linear_ul() {
-+    return _string_indexof_linear_ul;
-+  }
++void TemplateTable::shouldnotreachhere() {
++  transition(vtos, vtos);
++  __ stop("should not reach here bytecode");
++}
 +
-+  static address string_indexof_linear_ll() {
-+    return _string_indexof_linear_ll;
-+  }
++void TemplateTable::aconst_null()
++{
++  transition(vtos, atos);
++  __ mv(x10, zr);
++}
 +
-+  static address string_indexof_linear_uu() {
-+    return _string_indexof_linear_uu;
-+  }
++void TemplateTable::iconst(int value)
++{
++  transition(vtos, itos);
++  __ mv(x10, value);
++}
 +
-+  static address large_byte_array_inflate() {
-+    return _large_byte_array_inflate;
-+  }
++void TemplateTable::lconst(int value)
++{
++  transition(vtos, ltos);
++  __ mv(x10, value);
++}
 +
-+  static address method_entry_barrier() {
-+    return _method_entry_barrier;
++void TemplateTable::fconst(int value)
++{
++  transition(vtos, ftos);
++  static float fBuf[2] = {1.0, 2.0};
++  __ mv(t0, (intptr_t)fBuf);
++  switch (value) {
++    case 0:
++      __ fmv_w_x(f10, zr);
++      break;
++    case 1:
++      __ flw(f10, t0, 0);
++      break;
++    case 2:
++      __ flw(f10, t0, sizeof(float));
++      break;
++    default:
++      ShouldNotReachHere();
 +  }
++}
 +
-+  static bool complete() {
-+    return _completed;
++void TemplateTable::dconst(int value)
++{
++  transition(vtos, dtos);
++  static double dBuf[2] = {1.0, 2.0};
++  __ mv(t0, (intptr_t)dBuf);
++  switch (value) {
++    case 0:
++      __ fmv_d_x(f10, zr);
++      break;
++    case 1:
++      __ fld(f10, t0, 0);
++      break;
++    case 2:
++      __ fld(f10, t0, sizeof(double));
++      break;
++    default:
++      ShouldNotReachHere();
 +  }
++}
 +
-+  static void set_completed() {
-+    _completed = true;
-+  }
-+};
++void TemplateTable::bipush()
++{
++  transition(vtos, itos);
++  __ load_signed_byte(x10, at_bcp(1));
++}
 +
-+#endif // CPU_RISCV_STUBROUTINES_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-new file mode 100644
-index 00000000000..6537b2dbd94
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-@@ -0,0 +1,1794 @@
-+/*
-+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++void TemplateTable::sipush()
++{
++  transition(vtos, itos);
++  __ load_unsigned_short(x10, at_bcp(1));
++  __ revb_w_w(x10, x10);
++  __ sraiw(x10, x10, 16);
++}
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "gc/shared/barrierSetAssembler.hpp"
-+#include "interpreter/bytecodeHistogram.hpp"
-+#include "interpreter/bytecodeTracer.hpp"
-+#include "interpreter/interp_masm.hpp"
-+#include "interpreter/interpreter.hpp"
-+#include "interpreter/interpreterRuntime.hpp"
-+#include "interpreter/templateInterpreterGenerator.hpp"
-+#include "interpreter/templateTable.hpp"
-+#include "memory/resourceArea.hpp"
-+#include "oops/arrayOop.hpp"
-+#include "oops/method.hpp"
-+#include "oops/methodData.hpp"
-+#include "oops/oop.inline.hpp"
-+#include "prims/jvmtiExport.hpp"
-+#include "prims/jvmtiThreadState.hpp"
-+#include "runtime/arguments.hpp"
-+#include "runtime/deoptimization.hpp"
-+#include "runtime/frame.inline.hpp"
-+#include "runtime/jniHandles.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "runtime/stubRoutines.hpp"
-+#include "runtime/synchronizer.hpp"
-+#include "runtime/timer.hpp"
-+#include "runtime/vframeArray.hpp"
-+#include "utilities/debug.hpp"
-+#include "utilities/powerOfTwo.hpp"
-+#include <sys/types.h>
++void TemplateTable::ldc(bool wide)
++{
++  transition(vtos, vtos);
++  Label call_ldc, notFloat, notClass, notInt, Done;
 +
-+#ifndef PRODUCT
-+#include "oops/method.hpp"
-+#endif // !PRODUCT
++  if (wide) {
++   __ get_unsigned_2_byte_index_at_bcp(x11, 1);
++  } else {
++   __ load_unsigned_byte(x11, at_bcp(1));
++  }
++  __ get_cpool_and_tags(x12, x10);
 +
-+// Size of interpreter code.  Increase if too small.  Interpreter will
-+// fail with a guarantee ("not enough space for interpreter generation");
-+// if too small.
-+// Run with +PrintInterpreter to get the VM to print out the size.
-+// Max size with JVMTI
-+int TemplateInterpreter::InterpreterCodeSize = 256 * 1024;
++  const int base_offset = ConstantPool::header_size() * wordSize;
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
 +
-+#define __ _masm->
++  // get type
++  __ addi(x13, x11, tags_offset);
++  __ add(x13, x10, x13);
++  __ membar(MacroAssembler::AnyAny);
++  __ lbu(x13, Address(x13, 0));
++  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
 +
-+//-----------------------------------------------------------------------------
++  // unresolved class - get the resolved class
++  __ mv(t1, (u1)JVM_CONSTANT_UnresolvedClass);
++  __ beq(x13, t1, call_ldc);
 +
-+address TemplateInterpreterGenerator::generate_slow_signature_handler() {
-+  address entry = __ pc();
++  // unresolved class in error state - call into runtime to throw the error
++  // from the first resolution attempt
++  __ mv(t1, (u1)JVM_CONSTANT_UnresolvedClassInError);
++  __ beq(x13, t1, call_ldc);
 +
-+  __ andi(esp, esp, -16);
-+  __ mv(c_rarg3, esp);
-+  // xmethod
-+  // xlocals
-+  // c_rarg3: first stack arg - wordSize
-+  // adjust sp
++  // resolved class - need to call vm to get java mirror of the class
++  __ mv(t1, (u1)JVM_CONSTANT_Class);
++  __ bne(x13, t1, notClass);
 +
-+  __ addi(sp, c_rarg3, -18 * wordSize);
-+  __ addi(sp, sp, -2 * wordSize);
-+  __ sd(ra, Address(sp, 0));
++  __ bind(call_ldc);
++  __ mv(c_rarg1, wide);
++  call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), c_rarg1);
++  __ push_ptr(x10);
++  __ verify_oop(x10);
++  __ j(Done);
 +
-+  __ call_VM(noreg,
-+             CAST_FROM_FN_PTR(address,
-+                              InterpreterRuntime::slow_signature_handler),
-+             xmethod, xlocals, c_rarg3);
++  __ bind(notClass);
++  __ mv(t1, (u1)JVM_CONSTANT_Float);
++  __ bne(x13, t1, notFloat);
 +
-+  // x10: result handler
++  // ftos
++  __ shadd(x11, x11, x12, x11, 3);
++  __ flw(f10, Address(x11, base_offset));
++  __ push_f(f10);
++  __ j(Done);
 +
-+  // Stack layout:
-+  // sp: return address           <- sp
-+  //      1 garbage
-+  //      8 integer args (if static first is unused)
-+  //      1 float/double identifiers
-+  //      8 double args
-+  //        stack args              <- esp
-+  //        garbage
-+  //        expression stack bottom
-+  //        bcp (NULL)
-+  //        ...
++  __ bind(notFloat);
 +
-+  // Restore ra
-+  __ ld(ra, Address(sp, 0));
-+  __ addi(sp, sp , 2 * wordSize);
++  __ mv(t1, (u1)JVM_CONSTANT_Integer);
++  __ bne(x13, t1, notInt);
 +
-+  // Do FP first so we can use c_rarg3 as temp
-+  __ lwu(c_rarg3, Address(sp, 9 * wordSize)); // float/double identifiers
++  // itos
++  __ shadd(x11, x11, x12, x11, 3);
++  __ lw(x10, Address(x11, base_offset));
++  __ push_i(x10);
++  __ j(Done);
 +
-+  for (int i = 0; i < Argument::n_float_register_parameters_c; i++) {
-+    const FloatRegister r = g_FPArgReg[i];
-+    Label d, done;
++  __ bind(notInt);
++  condy_helper(Done);
 +
-+    __ andi(t0, c_rarg3, 1UL << i);
-+    __ bnez(t0, d);
-+    __ flw(r, Address(sp, (10 + i) * wordSize));
-+    __ j(done);
-+    __ bind(d);
-+    __ fld(r, Address(sp, (10 + i) * wordSize));
-+    __ bind(done);
-+  }
++  __ bind(Done);
++}
 +
-+  // c_rarg0 contains the result from the call of
-+  // InterpreterRuntime::slow_signature_handler so we don't touch it
-+  // here.  It will be loaded with the JNIEnv* later.
-+  for (int i = 1; i < Argument::n_int_register_parameters_c; i++) {
-+    const Register rm = g_INTArgReg[i];
-+    __ ld(rm, Address(sp, i * wordSize));
-+  }
++// Fast path for caching oop constants.
++void TemplateTable::fast_aldc(bool wide)
++{
++  transition(vtos, atos);
 +
-+  __ addi(sp, sp, 18 * wordSize);
-+  __ ret();
++  const Register result = x10;
++  const Register tmp = x11;
++  const Register rarg = x12;
 +
-+  return entry;
-+}
++  const int index_size = wide ? sizeof(u2) : sizeof(u1);
 +
-+// Various method entries
-+address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
-+  // xmethod: Method*
-+  // x30: sender sp
-+  // esp: args
++  Label resolved;
 +
-+  if (!InlineIntrinsics) {
-+    return NULL; // Generate a vanilla entry
-+  }
++  // We are resolved if the resolved reference cache entry contains a
++  // non-null object (String, MethodType, etc.)
++  assert_different_registers(result, tmp);
++  __ get_cache_index_at_bcp(tmp, 1, index_size);
++  __ load_resolved_reference_at_index(result, tmp);
++  __ bnez(result, resolved);
 +
-+  // These don't need a safepoint check because they aren't virtually
-+  // callable. We won't enter these intrinsics from compiled code.
-+  // If in the future we added an intrinsic which was virtually callable
-+  // we'd have to worry about how to safepoint so that this code is used.
-+
-+  // mathematical functions inlined by compiler
-+  // (interpreter must provide identical implementation
-+  // in order to avoid monotonicity bugs when switching
-+  // from interpreter to compiler in the middle of some
-+  // computation)
-+  //
-+  // stack:
-+  //        [ arg ] <-- esp
-+  //        [ arg ]
-+  // retaddr in ra
-+
-+  address fn = NULL;
-+  address entry_point = NULL;
-+  Register continuation = ra;
-+  switch (kind) {
-+    case Interpreter::java_lang_math_abs:
-+      entry_point = __ pc();
-+      __ fld(f10, Address(esp));
-+      __ fabs_d(f10, f10);
-+      __ mv(sp, x30); // Restore caller's SP
-+      break;
-+    case Interpreter::java_lang_math_sqrt:
-+      entry_point = __ pc();
-+      __ fld(f10, Address(esp));
-+      __ fsqrt_d(f10, f10);
-+      __ mv(sp, x30);
-+      break;
-+    case Interpreter::java_lang_math_sin :
-+      entry_point = __ pc();
-+      __ fld(f10, Address(esp));
-+      __ mv(sp, x30);
-+      __ mv(x9, ra);
-+      continuation = x9;  // The first callee-saved register
-+      if (StubRoutines::dsin() == NULL) {
-+        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin);
-+      } else {
-+        fn = CAST_FROM_FN_PTR(address, StubRoutines::dsin());
-+      }
-+      __ mv(t0, fn);
-+      __ jalr(t0);
-+      break;
-+    case Interpreter::java_lang_math_cos :
-+      entry_point = __ pc();
-+      __ fld(f10, Address(esp));
-+      __ mv(sp, x30);
-+      __ mv(x9, ra);
-+      continuation = x9;  // The first callee-saved register
-+      if (StubRoutines::dcos() == NULL) {
-+        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos);
-+      } else {
-+        fn = CAST_FROM_FN_PTR(address, StubRoutines::dcos());
-+      }
-+      __ mv(t0, fn);
-+      __ jalr(t0);
-+      break;
-+    case Interpreter::java_lang_math_tan :
-+      entry_point = __ pc();
-+      __ fld(f10, Address(esp));
-+      __ mv(sp, x30);
-+      __ mv(x9, ra);
-+      continuation = x9;  // The first callee-saved register
-+      if (StubRoutines::dtan() == NULL) {
-+        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan);
-+      } else {
-+        fn = CAST_FROM_FN_PTR(address, StubRoutines::dtan());
-+      }
-+      __ mv(t0, fn);
-+      __ jalr(t0);
-+      break;
-+    case Interpreter::java_lang_math_log :
-+      entry_point = __ pc();
-+      __ fld(f10, Address(esp));
-+      __ mv(sp, x30);
-+      __ mv(x9, ra);
-+      continuation = x9;  // The first callee-saved register
-+      if (StubRoutines::dlog() == NULL) {
-+        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog);
-+      } else {
-+        fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog());
-+      }
-+      __ mv(t0, fn);
-+      __ jalr(t0);
-+      break;
-+    case Interpreter::java_lang_math_log10 :
-+      entry_point = __ pc();
-+      __ fld(f10, Address(esp));
-+      __ mv(sp, x30);
-+      __ mv(x9, ra);
-+      continuation = x9;  // The first callee-saved register
-+      if (StubRoutines::dlog10() == NULL) {
-+        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10);
-+      } else {
-+        fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog10());
-+      }
-+      __ mv(t0, fn);
-+      __ jalr(t0);
-+      break;
-+    case Interpreter::java_lang_math_exp :
-+      entry_point = __ pc();
-+      __ fld(f10, Address(esp));
-+      __ mv(sp, x30);
-+      __ mv(x9, ra);
-+      continuation = x9;  // The first callee-saved register
-+      if (StubRoutines::dexp() == NULL) {
-+        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp);
-+      } else {
-+        fn = CAST_FROM_FN_PTR(address, StubRoutines::dexp());
-+      }
-+      __ mv(t0, fn);
-+      __ jalr(t0);
-+      break;
-+    case Interpreter::java_lang_math_pow :
-+      entry_point = __ pc();
-+      __ mv(x9, ra);
-+      continuation = x9;
-+      __ fld(f10, Address(esp, 2 * Interpreter::stackElementSize));
-+      __ fld(f11, Address(esp));
-+      __ mv(sp, x30);
-+      if (StubRoutines::dpow() == NULL) {
-+        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow);
-+      } else {
-+        fn = CAST_FROM_FN_PTR(address, StubRoutines::dpow());
-+      }
-+      __ mv(t0, fn);
-+      __ jalr(t0);
-+      break;
-+    case Interpreter::java_lang_math_fmaD :
-+      if (UseFMA) {
-+        entry_point = __ pc();
-+        __ fld(f10, Address(esp, 4 * Interpreter::stackElementSize));
-+        __ fld(f11, Address(esp, 2 * Interpreter::stackElementSize));
-+        __ fld(f12, Address(esp));
-+        __ fmadd_d(f10, f10, f11, f12);
-+        __ mv(sp, x30); // Restore caller's SP
-+      }
-+      break;
-+    case Interpreter::java_lang_math_fmaF :
-+      if (UseFMA) {
-+        entry_point = __ pc();
-+        __ flw(f10, Address(esp, 2 * Interpreter::stackElementSize));
-+        __ flw(f11, Address(esp, Interpreter::stackElementSize));
-+        __ flw(f12, Address(esp));
-+        __ fmadd_s(f10, f10, f11, f12);
-+        __ mv(sp, x30); // Restore caller's SP
-+      }
-+      break;
-+    default:
-+      ;
-+  }
-+  if (entry_point != NULL) {
-+    __ jr(continuation);
-+  }
++  const address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
 +
-+  return entry_point;
-+}
++  // first time invocation - must resolve first
++  __ mv(rarg, (int)bytecode());
++  __ call_VM(result, entry, rarg);
 +
-+// Abstract method entry
-+// Attempt to execute abstract method. Throw exception
-+address TemplateInterpreterGenerator::generate_abstract_entry(void) {
-+  // xmethod: Method*
-+  // x30: sender SP
++  __ bind(resolved);
 +
-+  address entry_point = __ pc();
++  { // Check for the null sentinel.
++    // If we just called the VM, it already did the mapping for us,
++    // but it's harmless to retry.
++    Label notNull;
 +
-+  // abstract method entry
++    // Stash null_sentinel address to get its value later
++    int32_t offset = 0;
++    __ movptr_with_offset(rarg, Universe::the_null_sentinel_addr(), offset);
++    __ ld(tmp, Address(rarg, offset));
++    __ bne(result, tmp, notNull);
++    __ mv(result, zr);  // NULL object reference
++    __ bind(notNull);
++  }
 +
-+  //  pop return address, reset last_sp to NULL
-+  __ empty_expression_stack();
-+  __ restore_bcp();      // bcp must be correct for exception handler   (was destroyed)
-+  __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
++  if (VerifyOops) {
++    // Safe to call with 0 result
++    __ verify_oop(result);
++  }
++}
 +
-+  // throw exception
-+  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
-+                                     InterpreterRuntime::throw_AbstractMethodErrorWithMethod),
-+                                     xmethod);
-+  // the call_VM checks for exception, so we should never return here.
-+  __ should_not_reach_here();
++void TemplateTable::ldc2_w()
++{
++    transition(vtos, vtos);
++    Label notDouble, notLong, Done;
++    __ get_unsigned_2_byte_index_at_bcp(x10, 1);
 +
-+  return entry_point;
-+}
++    __ get_cpool_and_tags(x11, x12);
++    const int base_offset = ConstantPool::header_size() * wordSize;
++    const int tags_offset = Array<u1>::base_offset_in_bytes();
 +
-+address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
-+  address entry = __ pc();
++    // get type
++    __ add(x12, x12, x10);
++    __ load_unsigned_byte(x12, Address(x12, tags_offset));
++    __ mv(t1, JVM_CONSTANT_Double);
++    __ bne(x12, t1, notDouble);
 +
-+#ifdef ASSERT
-+  {
-+    Label L;
-+    __ ld(t0, Address(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize));
-+    __ mv(t1, sp);
-+    // maximal sp for current fp (stack grows negative)
-+    // check if frame is complete
-+    __ bge(t0, t1, L);
-+    __ stop ("interpreter frame not set up");
-+    __ bind(L);
-+  }
-+#endif // ASSERT
-+  // Restore bcp under the assumption that the current frame is still
-+  // interpreted
-+  __ restore_bcp();
++    // dtos
++    __ shadd(x12, x10, x11, x12, 3);
++    __ fld(f10, Address(x12, base_offset));
++    __ push_d(f10);
++    __ j(Done);
 +
-+  // expression stack must be empty before entering the VM if an
-+  // exception happened
-+  __ empty_expression_stack();
-+  // throw exception
-+  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError));
-+  return entry;
-+}
++    __ bind(notDouble);
++    __ mv(t1, (int)JVM_CONSTANT_Long);
++    __ bne(x12, t1, notLong);
 +
-+address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() {
-+  address entry = __ pc();
-+  // expression stack must be empty before entering the VM if an
-+  // exception happened
-+  __ empty_expression_stack();
-+  // setup parameters
++    // ltos
++    __ shadd(x10, x10, x11, x10, 3);
++    __ ld(x10, Address(x10, base_offset));
++    __ push_l(x10);
++    __ j(Done);
 +
-+  // convention: expect aberrant index in register x11
-+  __ zero_extend(c_rarg2, x11, 32);
-+  // convention: expect array in register x13
-+  __ mv(c_rarg1, x13);
-+  __ call_VM(noreg,
-+             CAST_FROM_FN_PTR(address,
-+                              InterpreterRuntime::
-+                              throw_ArrayIndexOutOfBoundsException),
-+             c_rarg1, c_rarg2);
-+  return entry;
++    __ bind(notLong);
++    condy_helper(Done);
++    __ bind(Done);
 +}
 +
-+address TemplateInterpreterGenerator::generate_ClassCastException_handler() {
-+  address entry = __ pc();
++void TemplateTable::condy_helper(Label& Done)
++{
++  const Register obj = x10;
++  const Register rarg = x11;
++  const Register flags = x12;
++  const Register off = x13;
 +
-+  // object is at TOS
-+  __ pop_reg(c_rarg1);
++  const address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
 +
-+  // expression stack must be empty before entering the VM if an
-+  // exception happened
-+  __ empty_expression_stack();
++  __ mv(rarg, (int) bytecode());
++  __ call_VM(obj, entry, rarg);
 +
-+  __ call_VM(noreg,
-+             CAST_FROM_FN_PTR(address,
-+                              InterpreterRuntime::
-+                              throw_ClassCastException),
-+             c_rarg1);
-+  return entry;
-+}
++  __ get_vm_result_2(flags, xthread);
 +
-+address TemplateInterpreterGenerator::generate_exception_handler_common(
-+  const char* name, const char* message, bool pass_oop) {
-+  assert(!pass_oop || message == NULL, "either oop or message but not both");
-+  address entry = __ pc();
-+  if (pass_oop) {
-+    // object is at TOS
-+    __ pop_reg(c_rarg2);
-+  }
-+  // expression stack must be empty before entering the VM if an
-+  // exception happened
-+  __ empty_expression_stack();
-+  // setup parameters
-+  __ la(c_rarg1, Address((address)name));
-+  if (pass_oop) {
-+    __ call_VM(x10, CAST_FROM_FN_PTR(address,
-+                                     InterpreterRuntime::
-+                                     create_klass_exception),
-+               c_rarg1, c_rarg2);
-+  } else {
-+    // kind of lame ExternalAddress can't take NULL because
-+    // external_word_Relocation will assert.
-+    if (message != NULL) {
-+      __ la(c_rarg2, Address((address)message));
-+    } else {
-+      __ mv(c_rarg2, NULL_WORD);
-+    }
-+    __ call_VM(x10,
-+               CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception),
-+               c_rarg1, c_rarg2);
-+  }
-+  // throw exception
-+  __ j(address(Interpreter::throw_exception_entry()));
-+  return entry;
-+}
++  // VMr = obj = base address to find primitive value to push
++  // VMr2 = flags = (tos, off) using format of CPCE::_flags
++  __ mv(off, flags);
++  __ mv(t0, ConstantPoolCacheEntry::field_index_mask);
++  __ andrw(off, off, t0);
 +
-+address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) {
-+  address entry = __ pc();
++  __ add(off, obj, off);
++  const Address field(off, 0); // base + R---->base + offset
 +
-+  // Restore stack bottom in case i2c adjusted stack
-+  __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
-+  // and NULL it as marker that esp is now tos until next java call
-+  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
-+  __ restore_bcp();
-+  __ restore_locals();
-+  __ restore_constant_pool_cache();
-+  __ get_method(xmethod);
++  __ slli(flags, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + ConstantPoolCacheEntry::tos_state_bits));
++  __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits); // (1 << 5) - 4 --> 28~31==> flags:0~3
 +
-+  if (state == atos) {
-+    Register obj = x10;
-+    Register mdp = x11;
-+    Register tmp = x12;
-+    __ ld(mdp, Address(xmethod, Method::method_data_offset()));
-+    __ profile_return_type(mdp, obj, tmp);
-+  }
++  switch (bytecode()) {
++    case Bytecodes::_ldc:   // fall through
++    case Bytecodes::_ldc_w: {
++      // tos in (itos, ftos, stos, btos, ctos, ztos)
++      Label notInt, notFloat, notShort, notByte, notChar, notBool;
++      __ mv(t1, itos);
++      __ bne(flags, t1, notInt);
++      // itos
++      __ lw(x10, field);
++      __ push(itos);
++      __ j(Done);
 +
-+  // Pop N words from the stack
-+  __ get_cache_and_index_at_bcp(x11, x12, 1, index_size);
-+  __ ld(x11, Address(x11, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
-+  __ andi(x11, x11, ConstantPoolCacheEntry::parameter_size_mask);
++      __ bind(notInt);
++      __ mv(t1, ftos);
++      __ bne(flags, t1, notFloat);
++      // ftos
++      __ load_float(field);
++      __ push(ftos);
++      __ j(Done);
 +
-+  __ shadd(esp, x11, esp, t0, 3);
++      __ bind(notFloat);
++      __ mv(t1, stos);
++      __ bne(flags, t1, notShort);
++      // stos
++      __ load_signed_short(x10, field);
++      __ push(stos);
++      __ j(Done);
 +
-+  // Restore machine SP
-+  __ ld(t0, Address(xmethod, Method::const_offset()));
-+  __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
-+  __ addi(t0, t0, frame::interpreter_frame_monitor_size() + 2);
-+  __ ld(t1,
-+        Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize));
-+  __ slli(t0, t0, 3);
-+  __ sub(t0, t1, t0);
-+  __ andi(sp, t0, -16);
++      __ bind(notShort);
++      __ mv(t1, btos);
++      __ bne(flags, t1, notByte);
++      // btos
++      __ load_signed_byte(x10, field);
++      __ push(btos);
++      __ j(Done);
 +
-+ __ check_and_handle_popframe(xthread);
-+ __ check_and_handle_earlyret(xthread);
++      __ bind(notByte);
++      __ mv(t1, ctos);
++      __ bne(flags, t1, notChar);
++      // ctos
++      __ load_unsigned_short(x10, field);
++      __ push(ctos);
++      __ j(Done);
 +
-+  __ get_dispatch();
-+  __ dispatch_next(state, step);
++      __ bind(notChar);
++      __ mv(t1, ztos);
++      __ bne(flags, t1, notBool);
++      // ztos
++      __ load_signed_byte(x10, field);
++      __ push(ztos);
++      __ j(Done);
 +
-+  return entry;
-+}
++      __ bind(notBool);
++      break;
++    }
 +
-+address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state,
-+                                                               int step,
-+                                                               address continuation) {
-+  address entry = __ pc();
-+  __ restore_bcp();
-+  __ restore_locals();
-+  __ restore_constant_pool_cache();
-+  __ get_method(xmethod);
-+  __ get_dispatch();
++    case Bytecodes::_ldc2_w: {
++      Label notLong, notDouble;
++      __ mv(t1, ltos);
++      __ bne(flags, t1, notLong);
++      // ltos
++      __ ld(x10, field);
++      __ push(ltos);
++      __ j(Done);
 +
-+  // Calculate stack limit
-+  __ ld(t0, Address(xmethod, Method::const_offset()));
-+  __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
-+  __ addi(t0, t0, frame::interpreter_frame_monitor_size() + 2);
-+  __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize));
-+  __ slli(t0, t0, 3);
-+  __ sub(t0, t1, t0);
-+  __ andi(sp, t0, -16);
++      __ bind(notLong);
++      __ mv(t1, dtos);
++      __ bne(flags, t1, notDouble);
++      // dtos
++      __ load_double(field);
++      __ push(dtos);
++      __ j(Done);
 +
-+  // Restore expression stack pointer
-+  __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
-+  // NULL last_sp until next java call
-+  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++      __ bind(notDouble);
++      break;
++    }
 +
-+  // handle exceptions
-+  {
-+    Label L;
-+    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
-+    __ beqz(t0, L);
-+    __ call_VM(noreg,
-+               CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception));
-+    __ should_not_reach_here();
-+    __ bind(L);
++    default:
++      ShouldNotReachHere();
 +  }
 +
-+  if (continuation == NULL) {
-+    __ dispatch_next(state, step);
-+  } else {
-+    __ jump_to_entry(continuation);
-+  }
-+  return entry;
++  __ stop("bad ldc/condy");
 +}
 +
-+address TemplateInterpreterGenerator::generate_result_handler_for(BasicType type) {
-+  address entry = __ pc();
-+  if (type == T_OBJECT) {
-+    // retrieve result from frame
-+    __ ld(x10, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize));
-+    // and verify it
-+    __ verify_oop(x10);
-+  } else {
-+   __ cast_primitive_type(type, x10);
-+  }
++void TemplateTable::locals_index(Register reg, int offset)
++{
++  __ lbu(reg, at_bcp(offset));
++  __ neg(reg, reg);
++}
 +
-+  __ ret();                                  // return from result handler
-+  return entry;
++void TemplateTable::iload() {
++  iload_internal();
 +}
 +
-+address TemplateInterpreterGenerator::generate_safept_entry_for(TosState state,
-+                                                                address runtime_entry) {
-+  assert_cond(runtime_entry != NULL);
-+  address entry = __ pc();
-+  __ push(state);
-+  __ call_VM(noreg, runtime_entry);
-+  __ fence(0xf, 0xf);
-+  __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos));
-+  return entry;
++void TemplateTable::nofast_iload() {
++  iload_internal(may_not_rewrite);
 +}
 +
-+// Helpers for commoning out cases in the various type of method entries.
-+//
++void TemplateTable::iload_internal(RewriteControl rc) {
++  transition(vtos, itos);
++  if (RewriteFrequentPairs && rc == may_rewrite) {
++    Label rewrite, done;
++    const Register bc = x14;
 +
++    // get next bytecode
++    __ load_unsigned_byte(x11, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
 +
-+// increment invocation count & check for overflow
-+//
-+// Note: checking for negative value instead of overflow
-+//       so we have a 'sticky' overflow test
-+//
-+// xmethod: method
-+//
-+void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow) {
-+  Label done;
-+  // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not.
-+  int increment = InvocationCounter::count_increment;
-+  Label no_mdo;
-+  if (ProfileInterpreter) {
-+    // Are we profiling?
-+    __ ld(x10, Address(xmethod, Method::method_data_offset()));
-+    __ beqz(x10, no_mdo);
-+    // Increment counter in the MDO
-+    const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) +
-+                                         in_bytes(InvocationCounter::counter_offset()));
-+    const Address mask(x10, in_bytes(MethodData::invoke_mask_offset()));
-+    __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow);
-+    __ j(done);
-+  }
-+  __ bind(no_mdo);
-+  // Increment counter in MethodCounters
-+  const Address invocation_counter(t1,
-+                                   MethodCounters::invocation_counter_offset() +
-+                                   InvocationCounter::counter_offset());
-+  __ get_method_counters(xmethod, t1, done);
-+  const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset()));
-+  __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow);
-+  __ bind(done);
-+}
-+
-+void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) {
-+  __ mv(c_rarg1, zr);
-+  __ call_VM(noreg,
-+             CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), c_rarg1);
-+  __ j(do_continue);
-+}
++    // if _iload, wait to rewrite to iload2.  We only want to rewrite the
++    // last two iloads in a pair.  Comparing against fast_iload means that
++    // the next bytecode is neither an iload or a caload, and therefore
++    // an iload pair.
++    __ mv(t1, Bytecodes::_iload);
++    __ beq(x11, t1, done);
 +
-+// See if we've got enough room on the stack for locals plus overhead
-+// below JavaThread::stack_overflow_limit(). If not, throw a StackOverflowError
-+// without going through the signal handler, i.e., reserved and yellow zones
-+// will not be made usable. The shadow zone must suffice to handle the
-+// overflow.
-+// The expression stack grows down incrementally, so the normal guard
-+// page mechanism will work for that.
-+//
-+// NOTE: Since the additional locals are also always pushed (wasn't
-+// obvious in generate_method_entry) so the guard should work for them
-+// too.
-+//
-+// Args:
-+//      x13: number of additional locals this frame needs (what we must check)
-+//      xmethod: Method*
-+//
-+// Kills:
-+//      x10
-+void TemplateInterpreterGenerator::generate_stack_overflow_check(void) {
++    // if _fast_iload rewrite to _fast_iload2
++    __ mv(t1, Bytecodes::_fast_iload);
++    __ mv(bc, Bytecodes::_fast_iload2);
++    __ beq(x11, t1, rewrite);
 +
-+  // monitor entry size: see picture of stack set
-+  // (generate_method_entry) and frame_amd64.hpp
-+  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++    // if _caload rewrite to _fast_icaload
++    __ mv(t1, Bytecodes::_caload);
++    __ mv(bc, Bytecodes::_fast_icaload);
++    __ beq(x11, t1, rewrite);
 +
-+  // total overhead size: entry_size + (saved fp through expr stack
-+  // bottom).  be sure to change this if you add/subtract anything
-+  // to/from the overhead area
-+  const int overhead_size =
-+    -(frame::interpreter_frame_initial_sp_offset * wordSize) + entry_size;
++    // else rewrite to _fast_iload
++    __ mv(bc, Bytecodes::_fast_iload);
 +
-+  const int page_size = os::vm_page_size();
++    // rewrite
++    // bc: new bytecode
++    __ bind(rewrite);
++    patch_bytecode(Bytecodes::_iload, bc, x11, false);
++    __ bind(done);
 +
-+  Label after_frame_check;
++  }
 +
-+  // see if the frame is greater than one page in size. If so,
-+  // then we need to verify there is enough stack space remaining
-+  // for the additional locals.
-+  __ mv(t0, (page_size - overhead_size) / Interpreter::stackElementSize);
-+  __ bleu(x13, t0, after_frame_check);
++  // do iload, get the local value into tos
++  locals_index(x11);
++  __ lw(x10, iaddress(x11, x10, _masm));
++}
 +
-+  // compute sp as if this were going to be the last frame on
-+  // the stack before the red zone
++void TemplateTable::fast_iload2()
++{
++  transition(vtos, itos);
++  locals_index(x11);
++  __ lw(x10, iaddress(x11, x10, _masm));
++  __ push(itos);
++  locals_index(x11, 3);
++  __ lw(x10, iaddress(x11, x10, _masm));
++}
 +
-+  // locals + overhead, in bytes
-+  __ mv(x10, overhead_size);
-+  __ shadd(x10, x13, x10, t0, Interpreter::logStackElementSize);  // 2 slots per parameter.
++void TemplateTable::fast_iload()
++{
++  transition(vtos, itos);
++  locals_index(x11);
++  __ lw(x10, iaddress(x11, x10, _masm));
++}
 +
-+  const Address stack_limit(xthread, JavaThread::stack_overflow_limit_offset());
-+  __ ld(t0, stack_limit);
++void TemplateTable::lload()
++{
++  transition(vtos, ltos);
++  __ lbu(x11, at_bcp(1));
++  __ slli(x11, x11, LogBytesPerWord);
++  __ sub(x11, xlocals, x11);
++  __ ld(x10, Address(x11, Interpreter::local_offset_in_bytes(1)));
++}
 +
-+#ifdef ASSERT
-+  Label limit_okay;
-+  // Verify that thread stack limit is non-zero.
-+  __ bnez(t0, limit_okay);
-+  __ stop("stack overflow limit is zero");
-+  __ bind(limit_okay);
-+#endif
++void TemplateTable::fload()
++{
++  transition(vtos, ftos);
++  locals_index(x11);
++  __ flw(f10, faddress(x11, t0, _masm));
++}
 +
-+  // Add stack limit to locals.
-+  __ add(x10, x10, t0);
++void TemplateTable::dload()
++{
++  transition(vtos, dtos);
++  __ lbu(x11, at_bcp(1));
++  __ slli(x11, x11, LogBytesPerWord);
++  __ sub(x11, xlocals, x11);
++  __ fld(f10, Address(x11, Interpreter::local_offset_in_bytes(1)));
++}
 +
-+  // Check against the current stack bottom.
-+  __ bgtu(sp, x10, after_frame_check);
++void TemplateTable::aload()
++{
++  transition(vtos, atos);
++  locals_index(x11);
++  __ ld(x10, iaddress(x11, x10, _masm));
 +
-+  // Remove the incoming args, peeling the machine SP back to where it
-+  // was in the caller.  This is not strictly necessary, but unless we
-+  // do so the stack frame may have a garbage FP; this ensures a
-+  // correct call stack that we can always unwind.  The ANDI should be
-+  // unnecessary because the sender SP in x30 is always aligned, but
-+  // it doesn't hurt.
-+  __ andi(sp, x30, -16);
++}
 +
-+  // Note: the restored frame is not necessarily interpreted.
-+  // Use the shared runtime version of the StackOverflowError.
-+  assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
-+  __ far_jump(RuntimeAddress(StubRoutines::throw_StackOverflowError_entry()));
++void TemplateTable::locals_index_wide(Register reg) {
++  __ lhu(reg, at_bcp(2));
++  __ revb_h_h_u(reg, reg); // reverse bytes in half-word and zero-extend
++  __ neg(reg, reg);
++}
 +
-+  // all done with frame size check
-+  __ bind(after_frame_check);
++void TemplateTable::wide_iload() {
++  transition(vtos, itos);
++  locals_index_wide(x11);
++  __ lw(x10, iaddress(x11, t0, _masm));
 +}
 +
-+// Allocate monitor and lock method (asm interpreter)
-+//
-+// Args:
-+//      xmethod: Method*
-+//      xlocals: locals
-+//
-+// Kills:
-+//      x10
-+//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, ...(param regs)
-+//      t0, t1 (temporary regs)
-+void TemplateInterpreterGenerator::lock_method() {
-+  // synchronize method
-+  const Address access_flags(xmethod, Method::access_flags_offset());
-+  const Address monitor_block_top(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
-+  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++void TemplateTable::wide_lload()
++{
++  transition(vtos, ltos);
++  __ lhu(x11, at_bcp(2));
++  __ revb_h_h_u(x11, x11); // reverse bytes in half-word and zero-extend
++  __ slli(x11, x11, LogBytesPerWord);
++  __ sub(x11, xlocals, x11);
++  __ ld(x10, Address(x11, Interpreter::local_offset_in_bytes(1)));
++}
 +
-+#ifdef ASSERT
-+  __ lwu(x10, access_flags);
-+  __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method doesn't need synchronization", false);
-+#endif // ASSERT
++void TemplateTable::wide_fload()
++{
++  transition(vtos, ftos);
++  locals_index_wide(x11);
++  __ flw(f10, faddress(x11, t0, _masm));
++}
 +
-+  // get synchronization object
-+  {
-+    Label done;
-+    __ lwu(x10, access_flags);
-+    __ andi(t0, x10, JVM_ACC_STATIC);
-+    // get receiver (assume this is frequent case)
-+    __ ld(x10, Address(xlocals, Interpreter::local_offset_in_bytes(0)));
-+    __ beqz(t0, done);
-+    __ load_mirror(x10, xmethod);
++void TemplateTable::wide_dload()
++{
++  transition(vtos, dtos);
++  __ lhu(x11, at_bcp(2));
++  __ revb_h_h_u(x11, x11); // reverse bytes in half-word and zero-extend
++  __ slli(x11, x11, LogBytesPerWord);
++  __ sub(x11, xlocals, x11);
++  __ fld(f10, Address(x11, Interpreter::local_offset_in_bytes(1)));
++}
 +
-+#ifdef ASSERT
-+    {
-+      Label L;
-+      __ bnez(x10, L);
-+      __ stop("synchronization object is NULL");
-+      __ bind(L);
-+    }
-+#endif // ASSERT
++void TemplateTable::wide_aload()
++{
++  transition(vtos, atos);
++  locals_index_wide(x11);
++  __ ld(x10, aaddress(x11, t0, _masm));
++}
 +
-+    __ bind(done);
++void TemplateTable::index_check(Register array, Register index)
++{
++  // destroys x11, t0
++  // check array
++  __ null_check(array, arrayOopDesc::length_offset_in_bytes());
++  // sign extend index for use by indexed load
++  // check index
++  const Register length = t0;
++  __ lwu(length, Address(array, arrayOopDesc::length_offset_in_bytes()));
++  if (index != x11) {
++    assert(x11 != array, "different registers");
++    __ mv(x11, index);
 +  }
++  Label ok;
++  __ addw(index, index, zr);
++  __ bltu(index, length, ok);
++  __ mv(x13, array);
++  __ mv(t0, Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
++  __ jr(t0);
++  __ bind(ok);
++}
 +
-+  // add space for monitor & lock
-+  __ add(sp, sp, - entry_size); // add space for a monitor entry
-+  __ add(esp, esp, - entry_size);
-+  __ mv(t0, esp);
-+  __ sd(t0, monitor_block_top);  // set new monitor block top
-+  // store object
-+  __ sd(x10, Address(esp, BasicObjectLock::obj_offset_in_bytes()));
-+  __ mv(c_rarg1, esp); // object address
-+  __ lock_object(c_rarg1);
++void TemplateTable::iaload()
++{
++  transition(itos, itos);
++  __ mv(x11, x10);
++  __ pop_ptr(x10);
++  // x10: array
++  // x11: index
++  index_check(x10, x11); // leaves index in x11
++  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2);
++  __ shadd(x10, x11, x10, t0, 2);
++  __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
++  __ addw(x10, x10, zr); // signed extended
 +}
 +
-+// Generate a fixed interpreter frame. This is identical setup for
-+// interpreted methods and for native methods hence the shared code.
-+//
-+// Args:
-+//      ra: return address
-+//      xmethod: Method*
-+//      xlocals: pointer to locals
-+//      xcpool: cp cache
-+//      stack_pointer: previous sp
-+void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
-+  // initialize fixed part of activation frame
-+  if (native_call) {
-+    __ add(esp, sp, - 14 * wordSize);
-+    __ mv(xbcp, zr);
-+    __ add(sp, sp, - 14 * wordSize);
-+    // add 2 zero-initialized slots for native calls
-+    __ sd(zr, Address(sp, 13 * wordSize));
-+    __ sd(zr, Address(sp, 12 * wordSize));
-+  } else {
-+    __ add(esp, sp, - 12 * wordSize);
-+    __ ld(t0, Address(xmethod, Method::const_offset()));     // get ConstMethod
-+    __ add(xbcp, t0, in_bytes(ConstMethod::codes_offset())); // get codebase
-+    __ add(sp, sp, - 12 * wordSize);
-+  }
-+  __ sd(xbcp, Address(sp, wordSize));
-+  __ sd(esp, Address(sp, 0));
++void TemplateTable::laload()
++{
++  transition(itos, ltos);
++  __ mv(x11, x10);
++  __ pop_ptr(x10);
++  // x10: array
++  // x11: index
++  index_check(x10, x11); // leaves index in x11
++  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3);
++  __ shadd(x10, x11, x10, t0, 3);
++  __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
++}
 +
-+  if (ProfileInterpreter) {
-+    Label method_data_continue;
-+    __ ld(t0, Address(xmethod, Method::method_data_offset()));
-+    __ beqz(t0, method_data_continue);
-+    __ la(t0, Address(t0, in_bytes(MethodData::data_offset())));
-+    __ bind(method_data_continue);
-+  }
++void TemplateTable::faload()
++{
++  transition(itos, ftos);
++  __ mv(x11, x10);
++  __ pop_ptr(x10);
++  // x10: array
++  // x11: index
++  index_check(x10, x11); // leaves index in x11
++  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2);
++  __ shadd(x10, x11, x10, t0, 2);
++  __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
++}
 +
-+  __ sd(xmethod, Address(sp, 7 * wordSize));
-+  __ sd(ProfileInterpreter ? t0 : zr, Address(sp, 6 * wordSize));
++void TemplateTable::daload()
++{
++  transition(itos, dtos);
++  __ mv(x11, x10);
++  __ pop_ptr(x10);
++  // x10: array
++  // x11: index
++  index_check(x10, x11); // leaves index in x11
++  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3);
++  __ shadd(x10, x11, x10, t0, 3);
++  __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
++}
 +
-+  // Get mirror and store it in the frame as GC root for this Method*
-+  __ load_mirror(t2, xmethod);
-+  __ sd(zr, Address(sp, 5 * wordSize));
-+  __ sd(t2, Address(sp, 4 * wordSize));
++void TemplateTable::aaload()
++{
++  transition(itos, atos);
++  __ mv(x11, x10);
++  __ pop_ptr(x10);
++  // x10: array
++  // x11: index
++  index_check(x10, x11); // leaves index in x11
++  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
++  __ shadd(x10, x11, x10, t0, LogBytesPerHeapOop);
++  do_oop_load(_masm,
++              Address(x10),
++              x10,
++              IS_ARRAY);
++}
 +
-+  __ ld(xcpool, Address(xmethod, Method::const_offset()));
-+  __ ld(xcpool, Address(xcpool, ConstMethod::constants_offset()));
-+  __ ld(xcpool, Address(xcpool, ConstantPool::cache_offset_in_bytes()));
-+  __ sd(xcpool, Address(sp, 3 * wordSize));
-+  __ sd(xlocals, Address(sp, 2 * wordSize));
++void TemplateTable::baload()
++{
++  transition(itos, itos);
++  __ mv(x11, x10);
++  __ pop_ptr(x10);
++  // x10: array
++  // x11: index
++  index_check(x10, x11); // leaves index in x11
++  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0);
++  __ shadd(x10, x11, x10, t0, 0);
++  __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
++}
 +
-+  __ sd(ra, Address(sp, 11 * wordSize));
-+  __ sd(fp, Address(sp, 10 * wordSize));
-+  __ la(fp, Address(sp, 12 * wordSize)); // include ra & fp
++void TemplateTable::caload()
++{
++ transition(itos, itos);
++  __ mv(x11, x10);
++  __ pop_ptr(x10);
++  // x10: array
++  // x11: index
++  index_check(x10, x11); // leaves index in x11
++  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1);
++  __ shadd(x10, x11, x10, t0, 1);
++  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
++}
 +
-+  // set sender sp
-+  // leave last_sp as null
-+  __ sd(x30, Address(sp, 9 * wordSize));
-+  __ sd(zr, Address(sp, 8 * wordSize));
++// iload followed by caload frequent pair
++void TemplateTable::fast_icaload()
++{
++  transition(vtos, itos);
++  // load index out of locals
++  locals_index(x12);
++  __ lw(x11, iaddress(x12, x11, _masm));
++  __ pop_ptr(x10);
 +
-+  // Move SP out of the way
-+  if (!native_call) {
-+    __ ld(t0, Address(xmethod, Method::const_offset()));
-+    __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
-+    __ add(t0, t0, frame::interpreter_frame_monitor_size() + 2);
-+    __ slli(t0, t0, 3);
-+    __ sub(t0, sp, t0);
-+    __ andi(sp, t0, -16);
-+  }
++  // x10: array
++  // x11: index
++  index_check(x10, x11); // leaves index in x11, kills t0
++  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); // addi, max imm is 2^11
++  __ shadd(x10, x11, x10, t0, 1);
++  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
 +}
 +
-+// End of helpers
-+
-+// Various method entries
-+//------------------------------------------------------------------------------------------------------------------------
-+//
-+//
++void TemplateTable::saload()
++{
++  transition(itos, itos);
++  __ mv(x11, x10);
++  __ pop_ptr(x10);
++  // x10: array
++  // x11: index
++  index_check(x10, x11); // leaves index in x11, kills t0
++  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_SHORT) >> 1);
++  __ shadd(x10, x11, x10, t0, 1);
++  __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
++}
 +
-+// Method entry for java.lang.ref.Reference.get.
-+address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
-+  // Code: _aload_0, _getfield, _areturn
-+  // parameter size = 1
-+  //
-+  // The code that gets generated by this routine is split into 2 parts:
-+  //    1. The "intrinsified" code for G1 (or any SATB based GC),
-+  //    2. The slow path - which is an expansion of the regular method entry.
-+  //
-+  // Notes:-
-+  // * In the G1 code we do not check whether we need to block for
-+  //   a safepoint. If G1 is enabled then we must execute the specialized
-+  //   code for Reference.get (except when the Reference object is null)
-+  //   so that we can log the value in the referent field with an SATB
-+  //   update buffer.
-+  //   If the code for the getfield template is modified so that the
-+  //   G1 pre-barrier code is executed when the current method is
-+  //   Reference.get() then going through the normal method entry
-+  //   will be fine.
-+  // * The G1 code can, however, check the receiver object (the instance
-+  //   of java.lang.Reference) and jump to the slow path if null. If the
-+  //   Reference object is null then we obviously cannot fetch the referent
-+  //   and so we don't need to call the G1 pre-barrier. Thus we can use the
-+  //   regular method entry code to generate the NPE.
-+  //
-+  // This code is based on generate_accessor_entry.
-+  //
-+  // xmethod: Method*
-+  // x30: senderSP must preserve for slow path, set SP to it on fast path
++void TemplateTable::iload(int n)
++{
++  transition(vtos, itos);
++  __ lw(x10, iaddress(n));
++}
 +
-+  // ra is live.  It must be saved around calls.
++void TemplateTable::lload(int n)
++{
++  transition(vtos, ltos);
++  __ ld(x10, laddress(n));
++}
 +
-+  address entry = __ pc();
++void TemplateTable::fload(int n)
++{
++  transition(vtos, ftos);
++  __ flw(f10, faddress(n));
++}
 +
-+  const int referent_offset = java_lang_ref_Reference::referent_offset();
-+  guarantee(referent_offset > 0, "referent offset not initialized");
++void TemplateTable::dload(int n)
++{
++  transition(vtos, dtos);
++  __ fld(f10, daddress(n));
++}
 +
-+  Label slow_path;
-+  const Register local_0 = c_rarg0;
-+  // Check if local 0 != NULL
-+  // If the receiver is null then it is OK to jump to the slow path.
-+  __ ld(local_0, Address(esp, 0));
-+  __ beqz(local_0, slow_path);
++void TemplateTable::aload(int n)
++{
++  transition(vtos, atos);
++  __ ld(x10, iaddress(n));
++}
 +
-+  __ mv(x9, x30);   // Move senderSP to a callee-saved register
++void TemplateTable::aload_0() {
++  aload_0_internal();
++}
 +
-+  // Load the value of the referent field.
-+  const Address field_address(local_0, referent_offset);
-+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+  bs->load_at(_masm, IN_HEAP | ON_WEAK_OOP_REF, T_OBJECT, local_0, field_address, /*tmp1*/ t1, /*tmp2*/ t0);
++void TemplateTable::nofast_aload_0() {
++  aload_0_internal(may_not_rewrite);
++}
 +
-+  // areturn
-+  __ andi(sp, x9, -16);  // done with stack
-+  __ ret();
-+
-+  // generate a vanilla interpreter entry as the slow path
-+  __ bind(slow_path);
-+  __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
-+  return entry;
-+}
-+
-+/**
-+ * Method entry for static native methods:
-+ *   int java.util.zip.CRC32.update(int crc, int b)
-+ */
-+address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
-+  // TODO: Unimplemented generate_CRC32_update_entry
-+  return 0;
-+}
++void TemplateTable::aload_0_internal(RewriteControl rc) {
++  // According to bytecode histograms, the pairs:
++  //
++  // _aload_0, _fast_igetfield
++  // _aload_0, _fast_agetfield
++  // _aload_0, _fast_fgetfield
++  //
++  // occur frequently. If RewriteFrequentPairs is set, the (slow)
++  // _aload_0 bytecode checks if the next bytecode is either
++  // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
++  // rewrites the current bytecode into a pair bytecode; otherwise it
++  // rewrites the current bytecode into _fast_aload_0 that doesn't do
++  // the pair check anymore.
++  //
++  // Note: If the next bytecode is _getfield, the rewrite must be
++  //       delayed, otherwise we may miss an opportunity for a pair.
++  //
++  // Also rewrite frequent pairs
++  //   aload_0, aload_1
++  //   aload_0, iload_1
++  // These bytecodes with a small amount of code are most profitable
++  // to rewrite
++  if (RewriteFrequentPairs && rc == may_rewrite) {
++    Label rewrite, done;
++    const Register bc = x14;
 +
-+/**
-+ * Method entry for static native methods:
-+ *   int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
-+ *   int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
-+ */
-+address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
-+  // TODO: Unimplemented generate_CRC32_updateBytes_entry
-+  return 0;
-+}
++    // get next bytecode
++    __ load_unsigned_byte(x11, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
 +
-+/**
-+ * Method entry for intrinsic-candidate (non-native) methods:
-+ *   int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
-+ *   int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int end)
-+ * Unlike CRC32, CRC32C does not have any methods marked as native
-+ * CRC32C also uses an "end" variable instead of the length variable CRC32 uses
-+ */
-+address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
-+  // TODO: Unimplemented generate_CRC32C_updateBytes_entry
-+  return 0;
-+}
++    // if _getfield then wait with rewrite
++    __ mv(t1, Bytecodes::Bytecodes::_getfield);
++    __ beq(x11, t1, done);
 +
-+void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
-+  // See more discussion in stackOverflow.hpp.
++    // if _igetfield then rewrite to _fast_iaccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
++    __ mv(t1, Bytecodes::_fast_igetfield);
++    __ mv(bc, Bytecodes::_fast_iaccess_0);
++    __ beq(x11, t1, rewrite);
 +
-+  const int shadow_zone_size = checked_cast<int>(StackOverflow::stack_shadow_zone_size());
-+  const int page_size = os::vm_page_size();
-+  const int n_shadow_pages = shadow_zone_size / page_size;
++    // if _agetfield then rewrite to _fast_aaccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
++    __ mv(t1, Bytecodes::_fast_agetfield);
++    __ mv(bc, Bytecodes::_fast_aaccess_0);
++    __ beq(x11, t1, rewrite);
 +
-+#ifdef ASSERT
-+  Label L_good_limit;
-+  __ ld(t0, Address(xthread, JavaThread::shadow_zone_safe_limit()));
-+  __ bnez(t0, L_good_limit);
-+  __ stop("shadow zone safe limit is not initialized");
-+  __ bind(L_good_limit);
-+
-+  Label L_good_watermark;
-+  __ ld(t0, Address(xthread, JavaThread::shadow_zone_growth_watermark()));
-+  __ bnez(t0, L_good_watermark);
-+  __ stop("shadow zone growth watermark is not initialized");
-+  __ bind(L_good_watermark);
-+#endif
++    // if _fgetfield then rewrite to _fast_faccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
++    __ mv(t1, Bytecodes::_fast_fgetfield);
++    __ mv(bc, Bytecodes::_fast_faccess_0);
++    __ beq(x11, t1, rewrite);
 +
-+  Label L_done;
++    // else rewrite to _fast_aload0
++    assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition");
++    __ mv(bc, Bytecodes::Bytecodes::_fast_aload_0);
 +
-+  __ ld(t0, Address(xthread, JavaThread::shadow_zone_growth_watermark()));
-+  __ bgtu(sp, t0, L_done);
++    // rewrite
++    // bc: new bytecode
++    __ bind(rewrite);
++    patch_bytecode(Bytecodes::_aload_0, bc, x11, false);
 +
-+  for (int p = 1; p <= n_shadow_pages; p++) {
-+    __ bang_stack_with_offset(p * page_size);
++    __ bind(done);
 +  }
 +
-+  // Record the new watermark, but only if the update is above the safe limit.
-+  // Otherwise, the next time around the check above would pass the safe limit.
-+  __ ld(t0, Address(xthread, JavaThread::shadow_zone_safe_limit()));
-+  __ bleu(sp, t0, L_done);
-+  __ sd(sp, Address(xthread, JavaThread::shadow_zone_growth_watermark()));
++  // Do actual aload_0 (must do this after patch_bytecode which might call VM and GC might change oop).
++  aload(0);
++}
 +
-+  __ bind(L_done);
++void TemplateTable::istore()
++{
++  transition(itos, vtos);
++  locals_index(x11);
++  __ sw(x10, iaddress(x11, t0, _masm));
 +}
 +
-+// Interpreter stub for calling a native method. (asm interpreter)
-+// This sets up a somewhat different looking stack for calling the
-+// native method than the typical interpreter frame setup.
-+address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
-+  // determine code generation flags
-+  bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods;
++void TemplateTable::lstore()
++{
++  transition(ltos, vtos);
++  locals_index(x11);
++  __ sd(x10, laddress(x11, t0, _masm));
++}
 +
-+  // x11: Method*
-+  // x30: sender sp
++void TemplateTable::fstore() {
++  transition(ftos, vtos);
++  locals_index(x11);
++  __ fsw(f10, iaddress(x11, t0, _masm));
++}
 +
-+  address entry_point = __ pc();
++void TemplateTable::dstore() {
++  transition(dtos, vtos);
++  locals_index(x11);
++  __ fsd(f10, daddress(x11, t0, _masm));
++}
 +
-+  const Address constMethod       (xmethod, Method::const_offset());
-+  const Address access_flags      (xmethod, Method::access_flags_offset());
-+  const Address size_of_parameters(x12, ConstMethod::
-+                                   size_of_parameters_offset());
++void TemplateTable::astore()
++{
++  transition(vtos, vtos);
++  __ pop_ptr(x10);
++  locals_index(x11);
++  __ sd(x10, aaddress(x11, t0, _masm));
++}
 +
-+  // get parameter size (always needed)
-+  __ ld(x12, constMethod);
-+  __ load_unsigned_short(x12, size_of_parameters);
++void TemplateTable::wide_istore() {
++  transition(vtos, vtos);
++  __ pop_i();
++  locals_index_wide(x11);
++  __ sw(x10, iaddress(x11, t0, _masm));
++}
 +
-+  // Native calls don't need the stack size check since they have no
-+  // expression stack and the arguments are already on the stack and
-+  // we only add a handful of words to the stack.
++void TemplateTable::wide_lstore() {
++  transition(vtos, vtos);
++  __ pop_l();
++  locals_index_wide(x11);
++  __ sd(x10, laddress(x11, t0, _masm));
++}
 +
-+  // xmethod: Method*
-+  // x12: size of parameters
-+  // x30: sender sp
++void TemplateTable::wide_fstore() {
++  transition(vtos, vtos);
++  __ pop_f();
++  locals_index_wide(x11);
++  __ fsw(f10, faddress(x11, t0, _masm));
++}
 +
-+  // for natives the size of locals is zero
++void TemplateTable::wide_dstore() {
++  transition(vtos, vtos);
++  __ pop_d();
++  locals_index_wide(x11);
++  __ fsd(f10, daddress(x11, t0, _masm));
++}
 +
-+  // compute beginning of parameters (xlocals)
-+  __ shadd(xlocals, x12, esp, xlocals, 3);
-+  __ addi(xlocals, xlocals, -wordSize);
++void TemplateTable::wide_astore() {
++  transition(vtos, vtos);
++  __ pop_ptr(x10);
++  locals_index_wide(x11);
++  __ sd(x10, aaddress(x11, t0, _masm));
++}
 +
-+  // Pull SP back to minimum size: this avoids holes in the stack
-+  __ andi(sp, esp, -16);
++void TemplateTable::iastore() {
++  transition(itos, vtos);
++  __ pop_i(x11);
++  __ pop_ptr(x13);
++  // x10: value
++  // x11: index
++  // x13: array
++  index_check(x13, x11); // prefer index in x11
++  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2);
++  __ shadd(t0, x11, x13, t0, 2);
++  __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg);
++}
 +
-+  // initialize fixed part of activation frame
-+  generate_fixed_frame(true);
++void TemplateTable::lastore() {
++  transition(ltos, vtos);
++  __ pop_i(x11);
++  __ pop_ptr(x13);
++  // x10: value
++  // x11: index
++  // x13: array
++  index_check(x13, x11); // prefer index in x11
++  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3);
++  __ shadd(t0, x11, x13, t0, 3);
++  __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg);
++}
 +
-+  // make sure method is native & not abstract
-+#ifdef ASSERT
-+  __ lwu(x10, access_flags);
-+  __ verify_access_flags(x10, JVM_ACC_NATIVE, "tried to execute non-native method as native", false);
-+  __ verify_access_flags(x10, JVM_ACC_ABSTRACT, "tried to execute abstract method in interpreter");
-+#endif
++void TemplateTable::fastore() {
++  transition(ftos, vtos);
++  __ pop_i(x11);
++  __ pop_ptr(x13);
++  // f10: value
++  // x11:  index
++  // x13:  array
++  index_check(x13, x11); // prefer index in x11
++  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2);
++  __ shadd(t0, x11, x13, t0, 2);
++  __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* ftos */, noreg, noreg);
++}
 +
-+  // Since at this point in the method invocation the exception
-+  // handler would try to exit the monitor of synchronized methods
-+  // which hasn't been entered yet, we set the thread local variable
-+  // _do_not_unlock_if_synchronized to true. The remove_activation
-+  // will check this flag.
++void TemplateTable::dastore() {
++  transition(dtos, vtos);
++  __ pop_i(x11);
++  __ pop_ptr(x13);
++  // f10: value
++  // x11:  index
++  // x13:  array
++  index_check(x13, x11); // prefer index in x11
++  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3);
++  __ shadd(t0, x11, x13, t0, 3);
++  __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* dtos */, noreg, noreg);
++}
 +
-+  const Address do_not_unlock_if_synchronized(xthread,
-+                                              in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
-+  __ mv(t1, true);
-+  __ sb(t1, do_not_unlock_if_synchronized);
++void TemplateTable::aastore() {
++  Label is_null, ok_is_subtype, done;
++  transition(vtos, vtos);
++  // stack: ..., array, index, value
++  __ ld(x10, at_tos());    // value
++  __ ld(x12, at_tos_p1()); // index
++  __ ld(x13, at_tos_p2()); // array
 +
-+  // increment invocation count & check for overflow
-+  Label invocation_counter_overflow;
-+  if (inc_counter) {
-+    generate_counter_incr(&invocation_counter_overflow);
-+  }
++  index_check(x13, x12);     // kills x11
++  __ add(x14, x12, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
++  __ shadd(x14, x14, x13, x14, LogBytesPerHeapOop);
 +
-+  Label continue_after_compile;
-+  __ bind(continue_after_compile);
++  Address element_address(x14, 0);
 +
-+  bang_stack_shadow_pages(true);
++  // do array store check - check for NULL value first
++  __ beqz(x10, is_null);
 +
-+  // reset the _do_not_unlock_if_synchronized flag
-+  __ sb(zr, do_not_unlock_if_synchronized);
++  // Move subklass into x11
++  __ load_klass(x11, x10);
++  // Move superklass into x10
++  __ load_klass(x10, x13);
++  __ ld(x10, Address(x10,
++                     ObjArrayKlass::element_klass_offset()));
++  // Compress array + index * oopSize + 12 into a single register.  Frees x12.
 +
-+  // check for synchronized methods
-+  // Must happen AFTER invocation_counter check and stack overflow check,
-+  // so method is not locked if overflows.
-+  if (synchronized) {
-+    lock_method();
-+  } else {
-+    // no synchronization necessary
-+#ifdef ASSERT
-+    __ lwu(x10, access_flags);
-+    __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method needs synchronization");
-+#endif
-+  }
++  // Generate subtype check.  Blows x12, x15
++  // Superklass in x10.  Subklass in x11.
++  __ gen_subtype_check(x11, ok_is_subtype); //todo
 +
-+  // start execution
-+#ifdef ASSERT
-+  __ verify_frame_setup();
-+#endif
++  // Come here on failure
++  // object is at TOS
++  __ j(Interpreter::_throw_ArrayStoreException_entry);
 +
-+  // jvmti support
-+  __ notify_method_entry();
++  // Come here on success
++  __ bind(ok_is_subtype);
 +
-+  // work registers
-+  const Register t = x18;
-+  const Register result_handler = x19;
++  // Get the value we will store
++  __ ld(x10, at_tos());
++  // Now store using the appropriate barrier
++  do_oop_store(_masm, element_address, x10, IS_ARRAY);
++  __ j(done);
 +
-+  // allocate space for parameters
-+  __ ld(t, Address(xmethod, Method::const_offset()));
-+  __ load_unsigned_short(t, Address(t, ConstMethod::size_of_parameters_offset()));
++  // Have a NULL in x10, x13=array, x12=index.  Store NULL at ary[idx]
++  __ bind(is_null);
++  __ profile_null_seen(x12);
 +
-+  __ slli(t, t, Interpreter::logStackElementSize);
-+  __ sub(x30, esp, t);
-+  __ andi(sp, x30, -16);
-+  __ mv(esp, x30);
++  // Store a NULL
++  do_oop_store(_masm, element_address, noreg, IS_ARRAY);
 +
-+  // get signature handler
-+  {
-+    Label L;
-+    __ ld(t, Address(xmethod, Method::signature_handler_offset()));
-+    __ bnez(t, L);
-+    __ call_VM(noreg,
-+               CAST_FROM_FN_PTR(address,
-+                                InterpreterRuntime::prepare_native_call),
-+               xmethod);
-+    __ ld(t, Address(xmethod, Method::signature_handler_offset()));
-+    __ bind(L);
-+  }
++  // Pop stack arguments
++  __ bind(done);
++  __ add(esp, esp, 3 * Interpreter::stackElementSize);
 +
-+  // call signature handler
-+  assert(InterpreterRuntime::SignatureHandlerGenerator::from() == xlocals,
-+         "adjust this code");
-+  assert(InterpreterRuntime::SignatureHandlerGenerator::to() == sp,
-+         "adjust this code");
-+  assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t0,
-+          "adjust this code");
++}
 +
-+  // The generated handlers do not touch xmethod (the method).
-+  // However, large signatures cannot be cached and are generated
-+  // each time here.  The slow-path generator can do a GC on return,
-+  // so we must reload it after the call.
-+  __ jalr(t);
-+  __ get_method(xmethod);        // slow path can do a GC, reload xmethod
++void TemplateTable::bastore()
++{
++  transition(itos, vtos);
++  __ pop_i(x11);
++  __ pop_ptr(x13);
++  // x10: value
++  // x11: index
++  // x13: array
++  index_check(x13, x11); // prefer index in x11
 +
++  // Need to check whether array is boolean or byte
++  // since both types share the bastore bytecode.
++  __ load_klass(x12, x13);
++  __ lwu(x12, Address(x12, Klass::layout_helper_offset()));
++  Label L_skip;
++  __ andi(t0, x12, Klass::layout_helper_boolean_diffbit());
++  __ beqz(t0, L_skip);
++  __ andi(x10, x10, 1);  // if it is a T_BOOLEAN array, mask the stored value to 0/1
++  __ bind(L_skip);
 +
-+  // result handler is in x10
-+  // set result handler
-+  __ mv(result_handler, x10);
-+  // pass mirror handle if static call
-+  {
-+    Label L;
-+    __ lwu(t, Address(xmethod, Method::access_flags_offset()));
-+    __ andi(t0, t, JVM_ACC_STATIC);
-+    __ beqz(t0, L);
-+    // get mirror
-+    __ load_mirror(t, xmethod);
-+    // copy mirror into activation frame
-+    __ sd(t, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize));
-+    // pass handle to mirror
-+    __ addi(c_rarg1, fp, frame::interpreter_frame_oop_temp_offset * wordSize);
-+    __ bind(L);
-+  }
++  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0);
 +
-+  // get native function entry point in x28
-+  {
-+    Label L;
-+    __ ld(x28, Address(xmethod, Method::native_function_offset()));
-+    address unsatisfied = (SharedRuntime::native_method_throw_unsatisfied_link_error_entry());
-+    __ mv(t1, unsatisfied);
-+    __ ld(t1, t1);
-+    __ bne(x28, t1, L);
-+    __ call_VM(noreg,
-+               CAST_FROM_FN_PTR(address,
-+                                InterpreterRuntime::prepare_native_call),
-+               xmethod);
-+    __ get_method(xmethod);
-+    __ ld(x28, Address(xmethod, Method::native_function_offset()));
-+    __ bind(L);
-+  }
++  __ add(x11, x13, x11);
++  __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(x11, 0), x10, noreg, noreg);
++}
 +
-+  // pass JNIEnv
-+  __ add(c_rarg0, xthread, in_bytes(JavaThread::jni_environment_offset()));
++void TemplateTable::castore()
++{
++  transition(itos, vtos);
++  __ pop_i(x11);
++  __ pop_ptr(x13);
++  // x10: value
++  // x11: index
++  // x13: array
++  index_check(x13, x11); // prefer index in x11
++  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1);
++  __ shadd(t0, x11, x13, t0, 1);
++  __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg);
++}
 +
-+  // It is enough that the pc() points into the right code
-+  // segment. It does not have to be the correct return pc.
-+  Label native_return;
-+  __ set_last_Java_frame(esp, fp, native_return, x30);
++void TemplateTable::sastore()
++{
++  castore();
++}
 +
-+  // change thread state
-+#ifdef ASSERT
-+  {
-+    Label L;
-+    __ lwu(t, Address(xthread, JavaThread::thread_state_offset()));
-+    __ addi(t0, zr, (u1)_thread_in_Java);
-+    __ beq(t, t0, L);
-+    __ stop("Wrong thread state in native stub");
-+    __ bind(L);
-+  }
-+#endif
++void TemplateTable::istore(int n)
++{
++  transition(itos, vtos);
++  __ sd(x10, iaddress(n));
++}
 +
-+  // Change state to native
-+  __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
-+  __ mv(t0, _thread_in_native);
-+  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
-+  __ sw(t0, Address(t1));
++void TemplateTable::lstore(int n)
++{
++  transition(ltos, vtos);
++  __ sd(x10, laddress(n));
++}
 +
-+  // Call the native method.
-+  __ jalr(x28);
-+  __ bind(native_return);
-+  __ get_method(xmethod);
-+  // result potentially in x10 or f10
++void TemplateTable::fstore(int n)
++{
++  transition(ftos, vtos);
++  __ fsw(f10, faddress(n));
++}
 +
-+  // make room for the pushes we're about to do
-+  __ sub(t0, esp, 4 * wordSize);
-+  __ andi(sp, t0, -16);
++void TemplateTable::dstore(int n)
++{
++  transition(dtos, vtos);
++  __ fsd(f10, daddress(n));
++}
 +
-+  // NOTE: The order of these pushes is known to frame::interpreter_frame_result
-+  // in order to extract the result of a method call. If the order of these
-+  // pushes change or anything else is added to the stack then the code in
-+  // interpreter_frame_result must also change.
-+  __ push(dtos);
-+  __ push(ltos);
++void TemplateTable::astore(int n)
++{
++  transition(vtos, vtos);
++  __ pop_ptr(x10);
++  __ sd(x10, iaddress(n));
++}
 +
-+  // change thread state
-+  // Force all preceding writes to be observed prior to thread state change
-+  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
++void TemplateTable::pop()
++{
++  transition(vtos, vtos);
++  __ addi(esp, esp, Interpreter::stackElementSize);
++}
 +
-+  __ mv(t0, _thread_in_native_trans);
-+  __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
++void TemplateTable::pop2()
++{
++  transition(vtos, vtos);
++  __ addi(esp, esp, 2 * Interpreter::stackElementSize);
++}
 +
-+  // Force this write out before the read below
-+  __ membar(MacroAssembler::AnyAny);
++void TemplateTable::dup()
++{
++  transition(vtos, vtos);
++  __ ld(x10, Address(esp, 0));
++  __ push_reg(x10);
++  // stack: ..., a, a
++}
 +
-+  // check for safepoint operation in progress and/or pending suspend requests
-+  {
-+    Label L, Continue;
++void TemplateTable::dup_x1()
++{
++  transition(vtos, vtos);
++  // stack: ..., a, b
++  __ ld(x10, at_tos());  // load b
++  __ ld(x12, at_tos_p1());  // load a
++  __ sd(x10, at_tos_p1());  // store b
++  __ sd(x12, at_tos());  // store a
++  __ push_reg(x10);                  // push b
++  // stack: ..., b, a, b
++}
 +
-+    // We need an acquire here to ensure that any subsequent load of the
-+    // global SafepointSynchronize::_state flag is ordered after this load
-+    // of the thread-local polling word. We don't want this poll to
-+    // return false (i.e. not safepointing) and a later poll of the global
-+    // SafepointSynchronize::_state spuriously to return true.
-+    //
-+    // This is to avoid a race when we're in a native->Java transition
-+    // racing the code which wakes up from a safepoint.
-+    __ safepoint_poll(L, true /* at_return */, true /* acquire */, false /* in_nmethod */);
-+    __ lwu(t1, Address(xthread, JavaThread::suspend_flags_offset()));
-+    __ beqz(t1, Continue);
-+    __ bind(L);
++void TemplateTable::dup_x2()
++{
++  transition(vtos, vtos);
++  // stack: ..., a, b, c
++  __ ld(x10, at_tos());  // load c
++  __ ld(x12, at_tos_p2());  // load a
++  __ sd(x10, at_tos_p2());  // store c in a
++  __ push_reg(x10);      // push c
++  // stack: ..., c, b, c, c
++  __ ld(x10, at_tos_p2());  // load b
++  __ sd(x12, at_tos_p2());  // store a in b
++  // stack: ..., c, a, c, c
++  __ sd(x10, at_tos_p1());  // store b in c
++  // stack: ..., c, a, b, c
++}
 +
-+    // Don't use call_VM as it will see a possible pending exception
-+    // and forward it and never return here preventing us from
-+    // clearing _last_native_pc down below. So we do a runtime call by
-+    // hand.
-+    //
-+    __ mv(c_rarg0, xthread);
-+    __ mv(t1, CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
-+    __ jalr(t1);
-+    __ get_method(xmethod);
-+    __ reinit_heapbase();
-+    __ bind(Continue);
-+  }
++void TemplateTable::dup2()
++{
++  transition(vtos, vtos);
++  // stack: ..., a, b
++  __ ld(x10, at_tos_p1());  // load a
++  __ push_reg(x10);                  // push a
++  __ ld(x10, at_tos_p1());  // load b
++  __ push_reg(x10);                  // push b
++  // stack: ..., a, b, a, b
++}
 +
-+  // change thread state
-+  // Force all preceding writes to be observed prior to thread state change
-+  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
++void TemplateTable::dup2_x1()
++{
++  transition(vtos, vtos);
++  // stack: ..., a, b, c
++  __ ld(x12, at_tos());     // load c
++  __ ld(x10, at_tos_p1());  // load b
++  __ push_reg(x10);             // push b
++  __ push_reg(x12);             // push c
++  // stack: ..., a, b, c, b, c
++  __ sd(x12, at_tos_p3());  // store c in b
++  // stack: ..., a, c, c, b, c
++  __ ld(x12, at_tos_p4());  // load a
++  __ sd(x12, at_tos_p2());  // store a in 2nd c
++  // stack: ..., a, c, a, b, c
++  __ sd(x10, at_tos_p4());  // store b in a
++  // stack: ..., b, c, a, b, c
++}
 +
-+  __ mv(t0, _thread_in_Java);
-+  __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
++void TemplateTable::dup2_x2()
++{
++  transition(vtos, vtos);
++  // stack: ..., a, b, c, d
++  __ ld(x12, at_tos());     // load d
++  __ ld(x10, at_tos_p1());  // load c
++  __ push_reg(x10);             // push c
++  __ push_reg(x12);             // push d
++  // stack: ..., a, b, c, d, c, d
++  __ ld(x10, at_tos_p4());  // load b
++  __ sd(x10, at_tos_p2());  // store b in d
++  __ sd(x12, at_tos_p4());  // store d in b
++  // stack: ..., a, d, c, b, c, d
++  __ ld(x12, at_tos_p5());  // load a
++  __ ld(x10, at_tos_p3());  // load c
++  __ sd(x12, at_tos_p3());  // store a in c
++  __ sd(x10, at_tos_p5());  // store c in a
++  // stack: ..., c, d, a, b, c, d
++}
 +
-+  // reset_last_Java_frame
-+  __ reset_last_Java_frame(true);
++void TemplateTable::swap()
++{
++  transition(vtos, vtos);
++  // stack: ..., a, b
++  __ ld(x12, at_tos_p1());  // load a
++  __ ld(x10, at_tos());     // load b
++  __ sd(x12, at_tos());     // store a in b
++  __ sd(x10, at_tos_p1());  // store b in a
++  // stack: ..., b, a
++}
 +
-+  if (CheckJNICalls) {
-+    // clear_pending_jni_exception_check
-+    __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset()));
++void TemplateTable::iop2(Operation op)
++{
++  transition(itos, itos);
++  // x10 <== x11 op x10
++  __ pop_i(x11);
++  switch (op) {
++    case add  : __ addw(x10, x11, x10);  break;
++    case sub  : __ subw(x10, x11, x10);  break;
++    case mul  : __ mulw(x10, x11, x10);  break;
++    case _and : __ andrw(x10, x11, x10); break;
++    case _or  : __ orrw(x10, x11, x10);  break;
++    case _xor : __ xorrw(x10, x11, x10); break;
++    case shl  : __ sllw(x10, x11, x10);  break;
++    case shr  : __ sraw(x10, x11, x10);  break;
++    case ushr : __ srlw(x10, x11, x10);  break;
++    default   : ShouldNotReachHere();
 +  }
++}
 +
-+  // reset handle block
-+  __ ld(t, Address(xthread, JavaThread::active_handles_offset()));
-+  __ sd(zr, Address(t, JNIHandleBlock::top_offset_in_bytes()));
-+
-+  // If result is an oop unbox and store it in frame where gc will see it
-+  // and result handler will pick it up
-+
-+  {
-+    Label no_oop;
-+    __ la(t, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT)));
-+    __ bne(t, result_handler, no_oop);
-+    // Unbox oop result, e.g. JNIHandles::resolve result.
-+    __ pop(ltos);
-+    __ resolve_jobject(x10, xthread, t);
-+    __ sd(x10, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize));
-+    // keep stack depth as expected by pushing oop which will eventually be discarded
-+    __ push(ltos);
-+    __ bind(no_oop);
++void TemplateTable::lop2(Operation op)
++{
++  transition(ltos, ltos);
++  // x10 <== x11 op x10
++  __ pop_l(x11);
++  switch (op) {
++    case add  : __ add(x10, x11, x10);  break;
++    case sub  : __ sub(x10, x11, x10);  break;
++    case mul  : __ mul(x10, x11, x10);  break;
++    case _and : __ andr(x10, x11, x10); break;
++    case _or  : __ orr(x10, x11, x10);  break;
++    case _xor : __ xorr(x10, x11, x10); break;
++    default   : ShouldNotReachHere();
 +  }
++}
 +
-+  {
-+    Label no_reguard;
-+    __ lwu(t0, Address(xthread, in_bytes(JavaThread::stack_guard_state_offset())));
-+    __ addi(t1, zr, (u1)StackOverflow::stack_guard_yellow_reserved_disabled);
-+    __ bne(t0, t1, no_reguard);
++void TemplateTable::idiv()
++{
++  transition(itos, itos);
++  // explicitly check for div0
++  Label no_div0;
++  __ bnez(x10, no_div0);
++  __ mv(t0, Interpreter::_throw_ArithmeticException_entry);
++  __ jr(t0);
++  __ bind(no_div0);
++  __ pop_i(x11);
++  // x10 <== x11 idiv x10
++  __ corrected_idivl(x10, x11, x10, /* want_remainder */ false);
++}
 +
-+    __ pusha(); // only save smashed registers
-+    __ mv(c_rarg0, xthread);
-+    __ mv(t1, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
-+    __ jalr(t1);
-+    __ popa(); // only restore smashed registers
-+    __ bind(no_reguard);
-+  }
++void TemplateTable::irem()
++{
++  transition(itos, itos);
++  // explicitly check for div0
++  Label no_div0;
++  __ bnez(x10, no_div0);
++  __ mv(t0, Interpreter::_throw_ArithmeticException_entry);
++  __ jr(t0);
++  __ bind(no_div0);
++  __ pop_i(x11);
++  // x10 <== x11 irem x10
++  __ corrected_idivl(x10, x11, x10, /* want_remainder */ true);
++}
 +
-+  // The method register is junk from after the thread_in_native transition
-+  // until here.  Also can't call_VM until the bcp has been
-+  // restored.  Need bcp for throwing exception below so get it now.
-+  __ get_method(xmethod);
++void TemplateTable::lmul()
++{
++  transition(ltos, ltos);
++  __ pop_l(x11);
++  __ mul(x10, x10, x11);
++}
 +
-+  // restore bcp to have legal interpreter frame, i.e., bci == 0 <=>
-+  // xbcp == code_base()
-+  __ ld(xbcp, Address(xmethod, Method::const_offset()));   // get ConstMethod*
-+  __ add(xbcp, xbcp, in_bytes(ConstMethod::codes_offset()));          // get codebase
-+  // handle exceptions (exception handling will handle unlocking!)
-+  {
-+    Label L;
-+    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
-+    __ beqz(t0, L);
-+    // Note: At some point we may want to unify this with the code
-+    // used in call_VM_base(); i.e., we should use the
-+    // StubRoutines::forward_exception code. For now this doesn't work
-+    // here because the sp is not correctly set at this point.
-+    __ MacroAssembler::call_VM(noreg,
-+                               CAST_FROM_FN_PTR(address,
-+                               InterpreterRuntime::throw_pending_exception));
-+    __ should_not_reach_here();
-+    __ bind(L);
-+  }
++void TemplateTable::ldiv()
++{
++  transition(ltos, ltos);
++  // explicitly check for div0
++  Label no_div0;
++  __ bnez(x10, no_div0);
++  __ mv(t0, Interpreter::_throw_ArithmeticException_entry);
++  __ jr(t0);
++  __ bind(no_div0);
++  __ pop_l(x11);
++  // x10 <== x11 ldiv x10
++  __ corrected_idivq(x10, x11, x10, /* want_remainder */ false);
++}
 +
-+  // do unlocking if necessary
-+  {
-+    Label L;
-+    __ lwu(t, Address(xmethod, Method::access_flags_offset()));
-+    __ andi(t0, t, JVM_ACC_SYNCHRONIZED);
-+    __ beqz(t0, L);
-+    // the code below should be shared with interpreter macro
-+    // assembler implementation
-+    {
-+      Label unlock;
-+      // BasicObjectLock will be first in list, since this is a
-+      // synchronized method. However, need to check that the object
-+      // has not been unlocked by an explicit monitorexit bytecode.
++void TemplateTable::lrem()
++{
++  transition(ltos, ltos);
++  // explicitly check for div0
++  Label no_div0;
++  __ bnez(x10, no_div0);
++  __ mv(t0, Interpreter::_throw_ArithmeticException_entry);
++  __ jr(t0);
++  __ bind(no_div0);
++  __ pop_l(x11);
++  // x10 <== x11 lrem x10
++  __ corrected_idivq(x10, x11, x10, /* want_remainder */ true);
++}
 +
-+      // monitor expect in c_rarg1 for slow unlock path
-+      __ la(c_rarg1, Address(fp,   // address of first monitor
-+                             (intptr_t)(frame::interpreter_frame_initial_sp_offset *
-+                                        wordSize - sizeof(BasicObjectLock))));
++void TemplateTable::lshl()
++{
++  transition(itos, ltos);
++  // shift count is in x10
++  __ pop_l(x11);
++  __ sll(x10, x11, x10);
++}
 +
-+      __ ld(t, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
-+      __ bnez(t, unlock);
++void TemplateTable::lshr()
++{
++  transition(itos, ltos);
++  // shift count is in x10
++  __ pop_l(x11);
++  __ sra(x10, x11, x10);
++}
 +
-+      // Entry already unlocked, need to throw exception
-+      __ MacroAssembler::call_VM(noreg,
-+                                 CAST_FROM_FN_PTR(address,
-+                                                  InterpreterRuntime::throw_illegal_monitor_state_exception));
-+      __ should_not_reach_here();
++void TemplateTable::lushr()
++{
++  transition(itos, ltos);
++  // shift count is in x10
++  __ pop_l(x11);
++  __ srl(x10, x11, x10);
++}
 +
-+      __ bind(unlock);
-+      __ unlock_object(c_rarg1);
-+    }
-+    __ bind(L);
++void TemplateTable::fop2(Operation op)
++{
++  transition(ftos, ftos);
++  switch (op) {
++    case add:
++      __ pop_f(f11);
++      __ fadd_s(f10, f11, f10);
++      break;
++    case sub:
++      __ pop_f(f11);
++      __ fsub_s(f10, f11, f10);
++      break;
++    case mul:
++      __ pop_f(f11);
++      __ fmul_s(f10, f11, f10);
++      break;
++    case div:
++      __ pop_f(f11);
++      __ fdiv_s(f10, f11, f10);
++      break;
++    case rem:
++      __ fmv_s(f11, f10);
++      __ pop_f(f10);
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem));
++      break;
++    default:
++      ShouldNotReachHere();
 +  }
++}
 +
-+  // jvmti support
-+  // Note: This must happen _after_ handling/throwing any exceptions since
-+  //       the exception handler code notifies the runtime of method exits
-+  //       too. If this happens before, method entry/exit notifications are
-+  //       not properly paired (was bug - gri 11/22/99).
-+  __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI);
-+
-+  __ pop(ltos);
-+  __ pop(dtos);
-+
-+  __ jalr(result_handler);
-+
-+  // remove activation
-+  __ ld(esp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender sp
-+  // remove frame anchor
-+  __ leave();
-+
-+  // restore sender sp
-+  __ mv(sp, esp);
-+
-+  __ ret();
-+
-+  if (inc_counter) {
-+    // Handle overflow of counter and compile method
-+    __ bind(invocation_counter_overflow);
-+    generate_counter_overflow(continue_after_compile);
++void TemplateTable::dop2(Operation op)
++{
++  transition(dtos, dtos);
++  switch (op) {
++    case add:
++      __ pop_d(f11);
++      __ fadd_d(f10, f11, f10);
++      break;
++    case sub:
++      __ pop_d(f11);
++      __ fsub_d(f10, f11, f10);
++      break;
++    case mul:
++      __ pop_d(f11);
++      __ fmul_d(f10, f11, f10);
++      break;
++    case div:
++      __ pop_d(f11);
++      __ fdiv_d(f10, f11, f10);
++      break;
++    case rem:
++      __ fmv_d(f11, f10);
++      __ pop_d(f10);
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem));
++      break;
++    default:
++      ShouldNotReachHere();
 +  }
-+
-+  return entry_point;
 +}
 +
-+//
-+// Generic interpreted method entry to (asm) interpreter
-+//
-+address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
-+
-+  // determine code generation flags
-+  const bool inc_counter  = UseCompiler || CountCompiledCalls || LogTouchedMethods;
-+
-+  // t0: sender sp
-+  address entry_point = __ pc();
-+
-+  const Address constMethod(xmethod, Method::const_offset());
-+  const Address access_flags(xmethod, Method::access_flags_offset());
-+  const Address size_of_parameters(x13,
-+                                   ConstMethod::size_of_parameters_offset());
-+  const Address size_of_locals(x13, ConstMethod::size_of_locals_offset());
-+
-+  // get parameter size (always needed)
-+  // need to load the const method first
-+  __ ld(x13, constMethod);
-+  __ load_unsigned_short(x12, size_of_parameters);
-+
-+  // x12: size of parameters
-+
-+  __ load_unsigned_short(x13, size_of_locals); // get size of locals in words
-+  __ sub(x13, x13, x12); // x13 = no. of additional locals
-+
-+  // see if we've got enough room on the stack for locals plus overhead.
-+  generate_stack_overflow_check();
-+
-+  // compute beginning of parameters (xlocals)
-+  __ shadd(xlocals, x12, esp, t1, 3);
-+  __ add(xlocals, xlocals, -wordSize);
++void TemplateTable::ineg()
++{
++  transition(itos, itos);
++  __ negw(x10, x10);
++}
 +
-+  // Make room for additional locals
-+  __ slli(t1, x13, 3);
-+  __ sub(t0, esp, t1);
++void TemplateTable::lneg()
++{
++  transition(ltos, ltos);
++  __ neg(x10, x10);
++}
 +
-+  // Padding between locals and fixed part of activation frame to ensure
-+  // SP is always 16-byte aligned.
-+  __ andi(sp, t0, -16);
++void TemplateTable::fneg()
++{
++  transition(ftos, ftos);
++  __ fneg_s(f10, f10);
++}
 +
-+  // x13 - # of additional locals
-+  // allocate space for locals
-+  // explicitly initialize locals
-+  {
-+    Label exit, loop;
-+    __ blez(x13, exit); // do nothing if x13 <= 0
-+    __ bind(loop);
-+    __ sd(zr, Address(t0));
-+    __ add(t0, t0, wordSize);
-+    __ add(x13, x13, -1); // until everything initialized
-+    __ bnez(x13, loop);
-+    __ bind(exit);
-+  }
++void TemplateTable::dneg()
++{
++  transition(dtos, dtos);
++  __ fneg_d(f10, f10);
++}
 +
-+  // And the base dispatch table
-+  __ get_dispatch();
++void TemplateTable::iinc()
++{
++  transition(vtos, vtos);
++  __ load_signed_byte(x11, at_bcp(2)); // get constant
++  locals_index(x12);
++  __ ld(x10, iaddress(x12, x10, _masm));
++  __ addw(x10, x10, x11);
++  __ sd(x10, iaddress(x12, t0, _masm));
++}
 +
-+  // initialize fixed part of activation frame
-+  generate_fixed_frame(false);
++void TemplateTable::wide_iinc()
++{
++  transition(vtos, vtos);
++  __ lwu(x11, at_bcp(2)); // get constant and index
++  __ revb_h_w_u(x11, x11); // reverse bytes in half-word (32bit) and zero-extend
++  __ zero_extend(x12, x11, 16);
++  __ neg(x12, x12);
++  __ slli(x11, x11, 32);
++  __ srai(x11, x11, 48);
++  __ ld(x10, iaddress(x12, t0, _masm));
++  __ addw(x10, x10, x11);
++  __ sd(x10, iaddress(x12, t0, _masm));
++}
 +
-+  // make sure method is not native & not abstract
++void TemplateTable::convert()
++{
++  // Checking
 +#ifdef ASSERT
-+  __ lwu(x10, access_flags);
-+  __ verify_access_flags(x10, JVM_ACC_NATIVE, "tried to execute native method as non-native");
-+  __ verify_access_flags(x10, JVM_ACC_ABSTRACT, "tried to execute abstract method in interpreter");
-+#endif
-+
-+  // Since at this point in the method invocation the exception
-+  // handler would try to exit the monitor of synchronized methods
-+  // which hasn't been entered yet, we set the thread local variable
-+  // _do_not_unlock_if_synchronized to true. The remove_activation
-+  // will check this flag.
-+
-+  const Address do_not_unlock_if_synchronized(xthread,
-+                                              in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
-+  __ mv(t1, true);
-+  __ sb(t1, do_not_unlock_if_synchronized);
-+
-+  Label no_mdp;
-+  const Register mdp = x13;
-+  __ ld(mdp, Address(xmethod, Method::method_data_offset()));
-+  __ beqz(mdp, no_mdp);
-+  __ add(mdp, mdp, in_bytes(MethodData::data_offset()));
-+  __ profile_parameters_type(mdp, x11, x12, x14); // use x11, x12, x14 as tmp registers
-+  __ bind(no_mdp);
-+
-+  // increment invocation count & check for overflow
-+  Label invocation_counter_overflow;
-+  if (inc_counter) {
-+    generate_counter_incr(&invocation_counter_overflow);
++  {
++    TosState tos_in  = ilgl;
++    TosState tos_out = ilgl;
++    switch (bytecode()) {
++      case Bytecodes::_i2l: // fall through
++      case Bytecodes::_i2f: // fall through
++      case Bytecodes::_i2d: // fall through
++      case Bytecodes::_i2b: // fall through
++      case Bytecodes::_i2c: // fall through
++      case Bytecodes::_i2s: tos_in = itos; break;
++      case Bytecodes::_l2i: // fall through
++      case Bytecodes::_l2f: // fall through
++      case Bytecodes::_l2d: tos_in = ltos; break;
++      case Bytecodes::_f2i: // fall through
++      case Bytecodes::_f2l: // fall through
++      case Bytecodes::_f2d: tos_in = ftos; break;
++      case Bytecodes::_d2i: // fall through
++      case Bytecodes::_d2l: // fall through
++      case Bytecodes::_d2f: tos_in = dtos; break;
++      default             : ShouldNotReachHere();
++    }
++    switch (bytecode()) {
++      case Bytecodes::_l2i: // fall through
++      case Bytecodes::_f2i: // fall through
++      case Bytecodes::_d2i: // fall through
++      case Bytecodes::_i2b: // fall through
++      case Bytecodes::_i2c: // fall through
++      case Bytecodes::_i2s: tos_out = itos; break;
++      case Bytecodes::_i2l: // fall through
++      case Bytecodes::_f2l: // fall through
++      case Bytecodes::_d2l: tos_out = ltos; break;
++      case Bytecodes::_i2f: // fall through
++      case Bytecodes::_l2f: // fall through
++      case Bytecodes::_d2f: tos_out = ftos; break;
++      case Bytecodes::_i2d: // fall through
++      case Bytecodes::_l2d: // fall through
++      case Bytecodes::_f2d: tos_out = dtos; break;
++      default             : ShouldNotReachHere();
++    }
++    transition(tos_in, tos_out);
 +  }
++#endif // ASSERT
 +
-+  Label continue_after_compile;
-+  __ bind(continue_after_compile);
-+
-+  bang_stack_shadow_pages(false);
++  // Conversion
++  switch (bytecode()) {
++    case Bytecodes::_i2l:
++      __ sign_extend(x10, x10, 32);
++      break;
++    case Bytecodes::_i2f:
++      __ fcvt_s_w(f10, x10);
++      break;
++    case Bytecodes::_i2d:
++      __ fcvt_d_w(f10, x10);
++      break;
++    case Bytecodes::_i2b:
++      __ sign_extend(x10, x10, 8);
++      break;
++    case Bytecodes::_i2c:
++      __ zero_extend(x10, x10, 16);
++      break;
++    case Bytecodes::_i2s:
++      __ sign_extend(x10, x10, 16);
++      break;
++    case Bytecodes::_l2i:
++      __ addw(x10, x10, zr);
++      break;
++    case Bytecodes::_l2f:
++      __ fcvt_s_l(f10, x10);
++      break;
++    case Bytecodes::_l2d:
++      __ fcvt_d_l(f10, x10);
++      break;
++    case Bytecodes::_f2i:
++      __ fcvt_w_s_safe(x10, f10);
++      break;
++    case Bytecodes::_f2l:
++      __ fcvt_l_s_safe(x10, f10);
++      break;
++    case Bytecodes::_f2d:
++      __ fcvt_d_s(f10, f10);
++      break;
++    case Bytecodes::_d2i:
++      __ fcvt_w_d_safe(x10, f10);
++      break;
++    case Bytecodes::_d2l:
++      __ fcvt_l_d_safe(x10, f10);
++      break;
++    case Bytecodes::_d2f:
++      __ fcvt_s_d(f10, f10);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
 +
-+  // reset the _do_not_unlock_if_synchronized flag
-+  __ sb(zr, do_not_unlock_if_synchronized);
++void TemplateTable::lcmp()
++{
++  transition(ltos, itos);
++  __ pop_l(x11);
++  __ cmp_l2i(t0, x11, x10);
++  __ mv(x10, t0);
++}
 +
-+  // check for synchronized methods
-+  // Must happen AFTER invocation_counter check and stack overflow check,
-+  // so method is not locked if overflows.
-+  if (synchronized) {
-+    // Allocate monitor and lock method
-+    lock_method();
++void TemplateTable::float_cmp(bool is_float, int unordered_result)
++{
++  // For instruction feq, flt and fle, the result is 0 if either operand is NaN
++  if (is_float) {
++    __ pop_f(f11);
++    // if unordered_result < 0:
++    //   we want -1 for unordered or less than, 0 for equal and 1 for
++    //   greater than.
++    // else:
++    //   we want -1 for less than, 0 for equal and 1 for unordered or
++    //   greater than.
++    // f11 primary, f10 secondary
++    __ float_compare(x10, f11, f10, unordered_result);
 +  } else {
-+    // no synchronization necessary
-+#ifdef ASSERT
-+    __ lwu(x10, access_flags);
-+    __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method needs synchronization");
-+#endif
++    __ pop_d(f11);
++    // if unordered_result < 0:
++    //   we want -1 for unordered or less than, 0 for equal and 1 for
++    //   greater than.
++    // else:
++    //   we want -1 for less than, 0 for equal and 1 for unordered or
++    //   greater than.
++    // f11 primary, f10 secondary
++    __ double_compare(x10, f11, f10, unordered_result);
 +  }
++}
 +
-+  // start execution
-+#ifdef ASSERT
-+  __ verify_frame_setup();
-+#endif
-+
-+  // jvmti support
-+  __ notify_method_entry();
++void TemplateTable::branch(bool is_jsr, bool is_wide)
++{
++  // We might be moving to a safepoint.  The thread which calls
++  // Interpreter::notice_safepoints() will effectively flush its cache
++  // when it makes a system call, but we need to do something to
++  // ensure that we see the changed dispatch table.
++  __ membar(MacroAssembler::LoadLoad);
 +
-+  __ dispatch_next(vtos);
++  __ profile_taken_branch(x10, x11);
++  const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
++                             InvocationCounter::counter_offset();
++  const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
++                              InvocationCounter::counter_offset();
 +
-+  // invocation counter overflow
-+  if (inc_counter) {
-+    // Handle overflow of counter and compile method
-+    __ bind(invocation_counter_overflow);
-+    generate_counter_overflow(continue_after_compile);
++  // load branch displacement
++  if (!is_wide) {
++    __ lhu(x12, at_bcp(1));
++    __ revb_h_h(x12, x12); // reverse bytes in half-word and sign-extend
++  } else {
++    __ lwu(x12, at_bcp(1));
++    __ revb_w_w(x12, x12); // reverse bytes in word and sign-extend
 +  }
 +
-+  return entry_point;
-+}
-+
-+//-----------------------------------------------------------------------------
-+// Exceptions
-+
-+void TemplateInterpreterGenerator::generate_throw_exception() {
-+  // Entry point in previous activation (i.e., if the caller was
-+  // interpreted)
-+  Interpreter::_rethrow_exception_entry = __ pc();
-+  // Restore sp to interpreter_frame_last_sp even though we are going
-+  // to empty the expression stack for the exception processing.
-+  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
-+  // x10: exception
-+  // x13: return address/pc that threw exception
-+  __ restore_bcp();    // xbcp points to call/send
-+  __ restore_locals();
-+  __ restore_constant_pool_cache();
-+  __ reinit_heapbase();  // restore xheapbase as heapbase.
-+  __ get_dispatch();
-+
-+  // Entry point for exceptions thrown within interpreter code
-+  Interpreter::_throw_exception_entry = __ pc();
-+  // If we came here via a NullPointerException on the receiver of a
-+  // method, xthread may be corrupt.
-+  __ get_method(xmethod);
-+  // expression stack is undefined here
-+  // x10: exception
-+  // xbcp: exception bcp
-+  __ verify_oop(x10);
-+  __ mv(c_rarg1, x10);
++  // Handle all the JSR stuff here, then exit.
++  // It's much shorter and cleaner than intermingling with the non-JSR
++  // normal-branch stuff occurring below.
 +
-+  // expression stack must be empty before entering the VM in case of
-+  // an exception
-+  __ empty_expression_stack();
-+  // find exception handler address and preserve exception oop
-+  __ call_VM(x13,
-+             CAST_FROM_FN_PTR(address,
-+                          InterpreterRuntime::exception_handler_for_exception),
-+             c_rarg1);
++  if (is_jsr) {
++    // compute return address as bci
++    __ ld(t1, Address(xmethod, Method::const_offset()));
++    __ add(t1, t1,
++           in_bytes(ConstMethod::codes_offset()) - (is_wide ? 5 : 3));
++    __ sub(x11, xbcp, t1);
++    __ push_i(x11);
++    // Adjust the bcp by the 16-bit displacement in x12
++    __ add(xbcp, xbcp, x12);
++    __ load_unsigned_byte(t0, Address(xbcp, 0));
++    // load the next target bytecode into t0, it is the argument of dispatch_only
++    __ dispatch_only(vtos, /*generate_poll*/true);
++    return;
++  }
 +
-+  // Calculate stack limit
-+  __ ld(t0, Address(xmethod, Method::const_offset()));
-+  __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
-+  __ add(t0, t0, frame::interpreter_frame_monitor_size() + 4);
-+  __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize));
-+  __ slli(t0, t0, 3);
-+  __ sub(t0, t1, t0);
-+  __ andi(sp, t0, -16);
++  // Normal (non-jsr) branch handling
 +
-+  // x10: exception handler entry point
-+  // x13: preserved exception oop
-+  // xbcp: bcp for exception handler
-+  __ push_ptr(x13); // push exception which is now the only value on the stack
-+  __ jr(x10); // jump to exception handler (may be _remove_activation_entry!)
++  // Adjust the bcp by the displacement in x12
++  __ add(xbcp, xbcp, x12);
 +
-+  // If the exception is not handled in the current frame the frame is
-+  // removed and the exception is rethrown (i.e. exception
-+  // continuation is _rethrow_exception).
-+  //
-+  // Note: At this point the bci is still the bxi for the instruction
-+  // which caused the exception and the expression stack is
-+  // empty. Thus, for any VM calls at this point, GC will find a legal
-+  // oop map (with empty expression stack).
++  assert(UseLoopCounter || !UseOnStackReplacement,
++         "on-stack-replacement requires loop counters");
++  Label backedge_counter_overflow;
++  Label profile_method;
++  Label dispatch;
++  if (UseLoopCounter) {
++    // increment backedge counter for backward branches
++    // x10: MDO
++    // x11: MDO bumped taken-count
++    // x12: target offset
++    __ bgtz(x12, dispatch); // count only if backward branch
 +
-+  //
-+  // JVMTI PopFrame support
-+  //
++    // check if MethodCounters exists
++    Label has_counters;
++    __ ld(t0, Address(xmethod, Method::method_counters_offset()));
++    __ bnez(t0, has_counters);
++    __ push_reg(x10);
++    __ push_reg(x11);
++    __ push_reg(x12);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++            InterpreterRuntime::build_method_counters), xmethod);
++    __ pop_reg(x12);
++    __ pop_reg(x11);
++    __ pop_reg(x10);
++    __ ld(t0, Address(xmethod, Method::method_counters_offset()));
++    __ beqz(t0, dispatch); // No MethodCounters allocated, OutOfMemory
++    __ bind(has_counters);
 +
-+  Interpreter::_remove_activation_preserving_args_entry = __ pc();
-+  __ empty_expression_stack();
-+  // Set the popframe_processing bit in pending_popframe_condition
-+  // indicating that we are currently handling popframe, so that
-+  // call_VMs that may happen later do not trigger new popframe
-+  // handling cycles.
-+  __ lwu(x13, Address(xthread, JavaThread::popframe_condition_offset()));
-+  __ ori(x13, x13, JavaThread::popframe_processing_bit);
-+  __ sw(x13, Address(xthread, JavaThread::popframe_condition_offset()));
++    if (TieredCompilation) {
++      Label no_mdo;
++      int increment = InvocationCounter::count_increment;
++      if (ProfileInterpreter) {
++        // Are we profiling?
++        __ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset())));
++        __ beqz(x11, no_mdo);
++        // Increment the MDO backedge counter
++        const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) +
++                                           in_bytes(InvocationCounter::counter_offset()));
++        const Address mask(x11, in_bytes(MethodData::backedge_mask_offset()));
++        __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
++                                   x10, t0, false,
++                                   UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
++        __ j(dispatch);
++      }
++      __ bind(no_mdo);
++      // Increment backedge counter in MethodCounters*
++      __ ld(t0, Address(xmethod, Method::method_counters_offset()));
++      const Address mask(t0, in_bytes(MethodCounters::backedge_mask_offset()));
++      __ increment_mask_and_jump(Address(t0, be_offset), increment, mask,
++                                 x10, t1, false,
++                                 UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
++    } else { // not TieredCompilation
++      // increment counter
++      __ ld(t1, Address(xmethod, Method::method_counters_offset()));
++      __ lwu(x10, Address(t1, be_offset));     // load backedge counter
++      __ addw(t0, x10, InvocationCounter::count_increment); // increment counter
++      __ sw(t0, Address(t1, be_offset));       // store counter
 +
-+  {
-+    // Check to see whether we are returning to a deoptimized frame.
-+    // (The PopFrame call ensures that the caller of the popped frame is
-+    // either interpreted or compiled and deoptimizes it if compiled.)
-+    // In this case, we can't call dispatch_next() after the frame is
-+    // popped, but instead must save the incoming arguments and restore
-+    // them after deoptimization has occurred.
-+    //
-+    // Note that we don't compare the return PC against the
-+    // deoptimization blob's unpack entry because of the presence of
-+    // adapter frames in C2.
-+    Label caller_not_deoptimized;
-+    __ ld(c_rarg1, Address(fp, frame::return_addr_offset * wordSize));
-+    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), c_rarg1);
-+    __ bnez(x10, caller_not_deoptimized);
++      __ lwu(x10, Address(t1, inv_offset));    // load invocation counter
++      __ andi(x10, x10, (unsigned)InvocationCounter::count_mask_value, x13); // and the status bits
++      __ addw(x10, x10, t0);        // add both counters
 +
-+    // Compute size of arguments for saving when returning to
-+    // deoptimized caller
-+    __ get_method(x10);
-+    __ ld(x10, Address(x10, Method::const_offset()));
-+    __ load_unsigned_short(x10, Address(x10, in_bytes(ConstMethod::
-+                                                      size_of_parameters_offset())));
-+    __ slli(x10, x10, Interpreter::logStackElementSize);
-+    __ restore_locals();
-+    __ sub(xlocals, xlocals, x10);
-+    __ add(xlocals, xlocals, wordSize);
-+    // Save these arguments
-+    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address,
-+                                           Deoptimization::
-+                                           popframe_preserve_args),
-+                          xthread, x10, xlocals);
++      if (ProfileInterpreter) {
++        // Test to see if we should create a method data oop
++        __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_profile_limit_offset())));
++        __ blt(x10, t0, dispatch);
 +
-+    __ remove_activation(vtos,
-+                         /* throw_monitor_exception */ false,
-+                         /* install_monitor_exception */ false,
-+                         /* notify_jvmdi */ false);
++        // if no method data exists, go to profile method
++        __ test_method_data_pointer(x10, profile_method);
 +
-+    // Inform deoptimization that it is responsible for restoring
-+    // these arguments
-+    __ mv(t0, JavaThread::popframe_force_deopt_reexecution_bit);
-+    __ sw(t0, Address(xthread, JavaThread::popframe_condition_offset()));
++        if (UseOnStackReplacement) {
++          // check for overflow against x11 which is the MDO taken count
++          __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset())));
++          __ bltu(x11, t0, dispatch); // Intel == Assembler::below, lo:unsigned lower
 +
-+    // Continue in deoptimization handler
-+    __ ret();
++          // When ProfileInterpreter is on, the backedge_count comes
++          // from the MethodData*, which value does not get reset on
++          // the call to frequency_counter_overflow().  To avoid
++          // excessive calls to the overflow routine while the method is
++          // being compiled, add a second test to make sure the overflow
++          // function is called only once every overflow_frequency.
++          const int overflow_frequency = 1024;
++          __ andi(x11, x11, overflow_frequency - 1);
++          __ beqz(x11, backedge_counter_overflow);
 +
-+    __ bind(caller_not_deoptimized);
++        }
++      } else {
++        if (UseOnStackReplacement) {
++          // check for overflow against x10, which is the sum of the
++          // counters
++          __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset())));
++          __ bgeu(x10, t0, backedge_counter_overflow); // Intel == Assembler::aboveEqual
++        }
++      }
++    }
++    __ bind(dispatch);
 +  }
++  // Pre-load the next target bytecode into t0
++  __ load_unsigned_byte(t0, Address(xbcp, 0));
 +
-+  __ remove_activation(vtos,
-+                       /* throw_monitor_exception */ false,
-+                       /* install_monitor_exception */ false,
-+                       /* notify_jvmdi */ false);
-+
-+  // Restore the last_sp and null it out
-+  __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
-+  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
++  // continue with the bytecode @ target
++  // t0: target bytecode
++  // xbcp: target bcp
++  __ dispatch_only(vtos, /*generate_poll*/true);
 +
-+  __ restore_bcp();
-+  __ restore_locals();
-+  __ restore_constant_pool_cache();
-+  __ get_method(xmethod);
-+  __ get_dispatch();
++  if (UseLoopCounter) {
++    if (ProfileInterpreter && !TieredCompilation) {
++      // Out-of-line code to allocate method data oop.
++      __ bind(profile_method);
++      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
++      __ load_unsigned_byte(x11, Address(xbcp, 0));  // restore target bytecode
++      __ set_method_data_pointer_for_bcp();
++      __ j(dispatch);
++    }
 +
-+  // The method data pointer was incremented already during
-+  // call profiling. We have to restore the mdp for the current bcp.
-+  if (ProfileInterpreter) {
-+    __ set_method_data_pointer_for_bcp();
-+  }
++    if (UseOnStackReplacement) {
++      // invocation counter overflow
++      __ bind(backedge_counter_overflow);
++      __ neg(x12, x12);
++      __ add(x12, x12, xbcp);     // branch xbcp
++      // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp)
++      __ call_VM(noreg,
++                 CAST_FROM_FN_PTR(address,
++                                  InterpreterRuntime::frequency_counter_overflow),
++                 x12);
++      __ load_unsigned_byte(x11, Address(xbcp, 0));  // restore target bytecode
 +
-+  // Clear the popframe condition flag
-+  __ sw(zr, Address(xthread, JavaThread::popframe_condition_offset()));
-+  assert(JavaThread::popframe_inactive == 0, "fix popframe_inactive");
++      // x10: osr nmethod (osr ok) or NULL (osr not possible)
++      // w11: target bytecode
++      // x12: temporary
++      __ beqz(x10, dispatch);     // test result -- no osr if null
++      // nmethod may have been invalidated (VM may block upon call_VM return)
++      __ lbu(x12, Address(x10, nmethod::state_offset()));
++      if (nmethod::in_use != 0) {
++        __ sub(x12, x12, nmethod::in_use);
++      }
++      __ bnez(x12, dispatch);
 +
-+#if INCLUDE_JVMTI
-+  {
-+    Label L_done;
++      // We have the address of an on stack replacement routine in x10
++      // We need to prepare to execute the OSR method. First we must
++      // migrate the locals and monitors off of the stack.
 +
-+    __ lbu(t0, Address(xbcp, 0));
-+    __ li(t1, Bytecodes::_invokestatic);
-+    __ bne(t1, t0, L_done);
++      __ mv(x9, x10);                             // save the nmethod
 +
-+    // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call.
-+    // Detect such a case in the InterpreterRuntime function and return the member name argument,or NULL.
++      call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
 +
-+    __ ld(c_rarg0, Address(xlocals, 0));
-+    __ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null),c_rarg0, xmethod, xbcp);
++      // x10 is OSR buffer, move it to expected parameter location
++      __ mv(j_rarg0, x10);
 +
-+    __ beqz(x10, L_done);
++      // remove activation
++      // get sender esp
++      __ ld(esp,
++          Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize));
++      // remove frame anchor
++      __ leave();
++      // Ensure compiled code always sees stack at proper alignment
++      __ andi(sp, esp, -16);
 +
-+    __ sd(x10, Address(esp, 0));
-+    __ bind(L_done);
++      // and begin the OSR nmethod
++      __ ld(t0, Address(x9, nmethod::osr_entry_point_offset()));
++      __ jr(t0);
++    }
 +  }
-+#endif // INCLUDE_JVMTI
-+
-+  // Restore machine SP
-+  __ ld(t0, Address(xmethod, Method::const_offset()));
-+  __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
-+  __ add(t0, t0, frame::interpreter_frame_monitor_size() + 4);
-+  __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize));
-+  __ slliw(t0, t0, 3);
-+  __ sub(t0, t1, t0);
-+  __ andi(sp, t0, -16);
-+
-+  __ dispatch_next(vtos);
-+  // end of PopFrame support
-+
-+  Interpreter::_remove_activation_entry = __ pc();
++}
 +
-+  // preserve exception over this code sequence
-+  __ pop_ptr(x10);
-+  __ sd(x10, Address(xthread, JavaThread::vm_result_offset()));
-+  // remove the activation (without doing throws on illegalMonitorExceptions)
-+  __ remove_activation(vtos, false, true, false);
-+  // restore exception
-+  __ get_vm_result(x10, xthread);
++void TemplateTable::if_0cmp(Condition cc)
++{
++  transition(itos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
 +
-+  // In between activations - previous activation type unknown yet
-+  // compute continuation point - the continuation point expects the
-+  // following registers set up:
-+  //
-+  // x10: exception
-+  // ra: return address/pc that threw exception
-+  // sp: expression stack of caller
-+  // fp: fp of caller
-+  // FIXME: There's no point saving ra here because VM calls don't trash it
-+  __ sub(sp, sp, 2 * wordSize);
-+  __ sd(x10, Address(sp, 0));                   // save exception
-+  __ sd(ra, Address(sp, wordSize));             // save return address
-+  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address,
-+                                         SharedRuntime::exception_handler_for_return_address),
-+                        xthread, ra);
-+  __ mv(x11, x10);                              // save exception handler
-+  __ ld(x10, Address(sp, 0));                   // restore exception
-+  __ ld(ra, Address(sp, wordSize));             // restore return address
-+  __ add(sp, sp, 2 * wordSize);
-+  // We might be returning to a deopt handler that expects x13 to
-+  // contain the exception pc
-+  __ mv(x13, ra);
-+  // Note that an "issuing PC" is actually the next PC after the call
-+  __ jr(x11);                                   // jump to exception
-+                                                // handler of caller
-+}
-+
-+//
-+// JVMTI ForceEarlyReturn support
-+//
-+address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state)  {
-+  address entry = __ pc();
-+
-+  __ restore_bcp();
-+  __ restore_locals();
-+  __ empty_expression_stack();
-+  __ load_earlyret_value(state);
-+
-+  __ ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset()));
-+  Address cond_addr(t0, JvmtiThreadState::earlyret_state_offset());
++  __ addw(x10, x10, zr);
++  switch (cc) {
++    case equal:
++      __ bnez(x10, not_taken);
++      break;
++    case not_equal:
++      __ beqz(x10, not_taken);
++      break;
++    case less:
++      __ bgez(x10, not_taken);
++      break;
++    case less_equal:
++      __ bgtz(x10, not_taken);
++      break;
++    case greater:
++      __ blez(x10, not_taken);
++      break;
++    case greater_equal:
++      __ bltz(x10, not_taken);
++      break;
++    default:
++      break;
++  }
 +
-+  // Clear the earlyret state
-+  assert(JvmtiThreadState::earlyret_inactive == 0, "should be");
-+  __ sd(zr, cond_addr);
++  branch(false, false);
++  __ bind(not_taken);
++  __ profile_not_taken_branch(x10);
++}
 +
-+  __ remove_activation(state,
-+                       false, /* throw_monitor_exception */
-+                       false, /* install_monitor_exception */
-+                       true); /* notify_jvmdi */
-+  __ ret();
++void TemplateTable::if_icmp(Condition cc)
++{
++  transition(itos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  __ pop_i(x11);
++  __ addw(x10, x10, zr);
++  switch (cc) {
++    case equal:
++      __ bne(x11, x10, not_taken);
++      break;
++    case not_equal:
++      __ beq(x11, x10, not_taken);
++      break;
++    case less:
++      __ bge(x11, x10, not_taken);
++      break;
++    case less_equal:
++      __ bgt(x11, x10, not_taken);
++      break;
++    case greater:
++      __ ble(x11, x10, not_taken);
++      break;
++    case greater_equal:
++      __ blt(x11, x10, not_taken);
++      break;
++    default:
++      break;
++  }
 +
-+  return entry;
++  branch(false, false);
++  __ bind(not_taken);
++  __ profile_not_taken_branch(x10);
 +}
-+// end of ForceEarlyReturn support
-+
-+//-----------------------------------------------------------------------------
-+// Helper for vtos entry point generation
 +
-+void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
-+                                                         address& bep,
-+                                                         address& cep,
-+                                                         address& sep,
-+                                                         address& aep,
-+                                                         address& iep,
-+                                                         address& lep,
-+                                                         address& fep,
-+                                                         address& dep,
-+                                                         address& vep) {
-+  assert(t != NULL && t->is_valid() && t->tos_in() == vtos, "illegal template");
-+  Label L;
-+  aep = __ pc();  __ push_ptr();  __ j(L);
-+  fep = __ pc();  __ push_f();    __ j(L);
-+  dep = __ pc();  __ push_d();    __ j(L);
-+  lep = __ pc();  __ push_l();    __ j(L);
-+  bep = cep = sep =
-+  iep = __ pc();  __ push_i();
-+  vep = __ pc();
-+  __ bind(L);
-+  generate_and_dispatch(t);
++void TemplateTable::if_nullcmp(Condition cc)
++{
++  transition(atos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  if (cc == equal) {
++    __ bnez(x10, not_taken);
++  } else {
++    __ beqz(x10, not_taken);
++  }
++  branch(false, false);
++  __ bind(not_taken);
++  __ profile_not_taken_branch(x10);
 +}
 +
-+//-----------------------------------------------------------------------------
++void TemplateTable::if_acmp(Condition cc)
++{
++  transition(atos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  __ pop_ptr(x11);
 +
-+// Non-product code
-+#ifndef PRODUCT
-+address TemplateInterpreterGenerator::generate_trace_code(TosState state) {
-+  address entry = __ pc();
++  if (cc == equal) {
++    __ bne(x11, x10, not_taken);
++  } else if (cc == not_equal) {
++    __ beq(x11, x10, not_taken);
++  }
++  branch(false, false);
++  __ bind(not_taken);
++  __ profile_not_taken_branch(x10);
++}
 +
-+  __ push_reg(ra);
-+  __ push(state);
-+  __ push_reg(RegSet::range(x10, x17) + RegSet::range(x5, x7) + RegSet::range(x28, x31), sp);
-+  __ mv(c_rarg2, x10);  // Pass itos
-+  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), c_rarg1, c_rarg2, c_rarg3);
-+  __ pop_reg(RegSet::range(x10, x17) + RegSet::range(x5, x7) + RegSet::range(x28, x31), sp);
-+  __ pop(state);
-+  __ pop_reg(ra);
-+  __ ret();                                   // return from result handler
++void TemplateTable::ret() {
++  transition(vtos, vtos);
++  // We might be moving to a safepoint.  The thread which calls
++  // Interpreter::notice_safepoints() will effectively flush its cache
++  // when it makes a system call, but we need to do something to
++  // ensure that we see the changed dispatch table.
++  __ membar(MacroAssembler::LoadLoad);
 +
-+  return entry;
++  locals_index(x11);
++  __ ld(x11, aaddress(x11, t1, _masm)); // get return bci, compute return bcp
++  __ profile_ret(x11, x12);
++  __ ld(xbcp, Address(xmethod, Method::const_offset()));
++  __ add(xbcp, xbcp, x11);
++  __ addi(xbcp, xbcp, in_bytes(ConstMethod::codes_offset()));
++  __ dispatch_next(vtos, 0, /*generate_poll*/true);
 +}
 +
-+void TemplateInterpreterGenerator::count_bytecode() {
-+  __ push_reg(t0);
-+  __ push_reg(x10);
-+  __ mv(x10, (address) &BytecodeCounter::_counter_value);
-+  __ li(t0, 1);
-+  __ amoadd_d(zr, x10, t0, Assembler::aqrl);
-+  __ pop_reg(x10);
-+  __ pop_reg(t0);
++void TemplateTable::wide_ret() {
++  transition(vtos, vtos);
++  locals_index_wide(x11);
++  __ ld(x11, aaddress(x11, t0, _masm)); // get return bci, compute return bcp
++  __ profile_ret(x11, x12);
++  __ ld(xbcp, Address(xmethod, Method::const_offset()));
++  __ add(xbcp, xbcp, x11);
++  __ add(xbcp, xbcp, in_bytes(ConstMethod::codes_offset()));
++  __ dispatch_next(vtos, 0, /*generate_poll*/true);
 +}
 +
-+void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { ; }
-+
-+void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { ; }
-+
-+void TemplateInterpreterGenerator::trace_bytecode(Template* t) {
-+  // Call a little run-time stub to avoid blow-up for each bytecode.
-+  // The run-time runtime saves the right registers, depending on
-+  // the tosca in-state for the given template.
++void TemplateTable::tableswitch() {
++  Label default_case, continue_execution;
++  transition(itos, vtos);
++  // align xbcp
++  __ la(x11, at_bcp(BytesPerInt));
++  __ andi(x11, x11, -BytesPerInt);
++  // load lo & hi
++  __ lwu(x12, Address(x11, BytesPerInt));
++  __ lwu(x13, Address(x11, 2 * BytesPerInt));
++  __ revb_w_w(x12, x12); // reverse bytes in word (32bit) and sign-extend
++  __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
++  // check against lo & hi
++  __ blt(x10, x12, default_case);
++  __ bgt(x10, x13, default_case);
++  // lookup dispatch offset
++  __ subw(x10, x10, x12);
++  __ shadd(x13, x10, x11, t0, 2);
++  __ lwu(x13, Address(x13, 3 * BytesPerInt));
++  __ profile_switch_case(x10, x11, x12);
++  // continue execution
++  __ bind(continue_execution);
++  __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
++  __ add(xbcp, xbcp, x13);
++  __ load_unsigned_byte(t0, Address(xbcp));
++  __ dispatch_only(vtos, /*generate_poll*/true);
++  // handle default
++  __ bind(default_case);
++  __ profile_switch_default(x10);
++  __ lwu(x13, Address(x11, 0));
++  __ j(continue_execution);
++}
 +
-+  assert(Interpreter::trace_code(t->tos_in()) != NULL, "entry must have been generated");
-+  __ jal(Interpreter::trace_code(t->tos_in()));
-+  __ reinit_heapbase();
++void TemplateTable::lookupswitch() {
++  transition(itos, itos);
++  __ stop("lookupswitch bytecode should have been rewritten");
 +}
 +
-+void TemplateInterpreterGenerator::stop_interpreter_at() {
-+  Label L;
-+  __ push_reg(t0);
-+  __ mv(t0, (address) &BytecodeCounter::_counter_value);
-+  __ ld(t0, Address(t0));
-+  __ mv(t1, StopInterpreterAt);
-+  __ bne(t0, t1, L);
-+  __ ebreak();
-+  __ bind(L);
-+  __ pop_reg(t0);
++void TemplateTable::fast_linearswitch() {
++  transition(itos, vtos);
++  Label loop_entry, loop, found, continue_execution;
++  // bswap x10 so we can avoid bswapping the table entries
++  __ revb_w_w(x10, x10); // reverse bytes in word (32bit) and sign-extend
++  // align xbcp
++  __ la(x9, at_bcp(BytesPerInt)); // btw: should be able to get rid of
++                                    // this instruction (change offsets
++                                    // below)
++  __ andi(x9, x9, -BytesPerInt);
++  // set counter
++  __ lwu(x11, Address(x9, BytesPerInt));
++  __ revb_w(x11, x11);
++  __ j(loop_entry);
++  // table search
++  __ bind(loop);
++  __ shadd(t0, x11, x9, t0, 3);
++  __ lw(t0, Address(t0, 2 * BytesPerInt));
++  __ beq(x10, t0, found);
++  __ bind(loop_entry);
++  __ addi(x11, x11, -1);
++  __ bgez(x11, loop);
++  // default case
++  __ profile_switch_default(x10);
++  __ lwu(x13, Address(x9, 0));
++  __ j(continue_execution);
++  // entry found -> get offset
++  __ bind(found);
++  __ shadd(t0, x11, x9, t0, 3);
++  __ lwu(x13, Address(t0, 3 * BytesPerInt));
++  __ profile_switch_case(x11, x10, x9);
++  // continue execution
++  __ bind(continue_execution);
++  __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
++  __ add(xbcp, xbcp, x13);
++  __ lbu(t0, Address(xbcp, 0));
++  __ dispatch_only(vtos, /*generate_poll*/true);
 +}
 +
-+#endif // !PRODUCT
-diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-new file mode 100644
-index 00000000000..d2a301c6e74
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-@@ -0,0 +1,3951 @@
-+/*
-+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++void TemplateTable::fast_binaryswitch() {
++  transition(itos, vtos);
++  // Implementation using the following core algorithm:
++  //
++  // int binary_search(int key, LookupswitchPair* array, int n)
++  //   binary_search start:
++  //   #Binary search according to "Methodik des Programmierens" by
++  //   # Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
++  //   int i = 0;
++  //   int j = n;
++  //   while (i + 1 < j) do
++  //     # invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
++  //     # with      Q: for all i: 0 <= i < n: key < a[i]
++  //     # where a stands for the array and assuming that the (inexisting)
++  //     # element a[n] is infinitely big.
++  //     int h = (i + j) >> 1
++  //     # i < h < j
++  //     if (key < array[h].fast_match())
++  //     then [j = h]
++  //     else [i = h]
++  //   end
++  //   # R: a[i] <= key < a[i+1] or Q
++  //   # (i.e., if key is within array, i is the correct index)
++  //   return i
++  // binary_search end
 +
-+#include "precompiled.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "gc/shared/barrierSetAssembler.hpp"
-+#include "gc/shared/collectedHeap.hpp"
-+#include "gc/shared/tlab_globals.hpp"
-+#include "interpreter/interp_masm.hpp"
-+#include "interpreter/interpreter.hpp"
-+#include "interpreter/interpreterRuntime.hpp"
-+#include "interpreter/templateTable.hpp"
-+#include "memory/universe.hpp"
-+#include "oops/method.hpp"
-+#include "oops/methodData.hpp"
-+#include "oops/objArrayKlass.hpp"
-+#include "oops/oop.inline.hpp"
-+#include "prims/jvmtiExport.hpp"
-+#include "prims/methodHandles.hpp"
-+#include "runtime/frame.inline.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "runtime/stubRoutines.hpp"
-+#include "runtime/synchronizer.hpp"
-+#include "utilities/powerOfTwo.hpp"
 +
-+#define __ _masm->
++  // Register allocation
++  const Register key   = x10; // already set (tosca)
++  const Register array = x11;
++  const Register i     = x12;
++  const Register j     = x13;
++  const Register h     = x14;
++  const Register temp  = x15;
 +
-+// Address computation: local variables
++  // Find array start
++  __ la(array, at_bcp(3 * BytesPerInt));  // btw: should be able to
++                                          // get rid of this
++                                          // instruction (change
++                                          // offsets below)
++  __ andi(array, array, -BytesPerInt);
 +
-+static inline Address iaddress(int n) {
-+  return Address(xlocals, Interpreter::local_offset_in_bytes(n));
-+}
++  // Initialize i & j
++  __ mv(i, zr);                            // i = 0
++  __ lwu(j, Address(array, -BytesPerInt)); // j = length(array)
 +
-+static inline Address laddress(int n) {
-+  return iaddress(n + 1);
-+}
++  // Convert j into native byteordering
++  __ revb_w(j, j);
 +
-+static inline Address faddress(int n) {
-+  return iaddress(n);
-+}
++  // And start
++  Label entry;
++  __ j(entry);
 +
-+static inline Address daddress(int n) {
-+  return laddress(n);
-+}
++  // binary search loop
++  {
++    Label loop;
++    __ bind(loop);
++    __ addw(h, i, j);                           // h = i + j
++    __ srliw(h, h, 1);                          // h = (i + j) >> 1
++    // if [key < array[h].fast_match()]
++    // then [j = h]
++    // else [i = h]
++    // Convert array[h].match to native byte-ordering before compare
++    __ shadd(temp, h, array, temp, 3);
++    __ ld(temp, Address(temp, 0));
++    __ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend
 +
-+static inline Address aaddress(int n) {
-+  return iaddress(n);
-+}
++    Label L_done, L_greater;
++    __ bge(key, temp, L_greater);
++    // if [key < array[h].fast_match()] then j = h
++    __ mv(j, h);
++    __ j(L_done);
++    __ bind(L_greater);
++    // if [key >= array[h].fast_match()] then i = h
++    __ mv(i, h);
++    __ bind(L_done);
 +
-+static inline Address iaddress(Register r,  Register temp, InterpreterMacroAssembler* _masm) {
-+  assert_cond(_masm != NULL);
-+  _masm->shadd(temp, r, xlocals, temp, 3);
-+  return Address(temp, 0);
-+}
++    // while [i + 1 < j]
++    __ bind(entry);
++    __ addiw(h, i, 1);         // i + 1
++    __ blt(h, j, loop);        // i + 1 < j
++  }
 +
-+static inline Address laddress(Register r, Register temp,
-+                               InterpreterMacroAssembler* _masm) {
-+  assert_cond(_masm != NULL);
-+  _masm->shadd(temp, r, xlocals, temp, 3);
-+  return Address(temp, Interpreter::local_offset_in_bytes(1));;
-+}
++  // end of binary search, result index is i (must check again!)
++  Label default_case;
++  // Convert array[i].match to native byte-ordering before compare
++  __ shadd(temp, i, array, temp, 3);
++  __ ld(temp, Address(temp, 0));
++  __ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend
++  __ bne(key, temp, default_case);
 +
-+static inline Address faddress(Register r, Register temp, InterpreterMacroAssembler* _masm) {
-+  return iaddress(r, temp, _masm);
-+}
++  // entry found -> j = offset
++  __ shadd(temp, i, array, temp, 3);
++  __ lwu(j, Address(temp, BytesPerInt));
++  __ profile_switch_case(i, key, array);
++  __ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend
 +
-+static inline Address daddress(Register r, Register temp,
-+                               InterpreterMacroAssembler* _masm) {
-+  return laddress(r, temp, _masm);
-+}
++  __ add(temp, xbcp, j);
++  __ load_unsigned_byte(t0, Address(temp, 0));
 +
-+static inline Address aaddress(Register r, Register temp, InterpreterMacroAssembler* _masm) {
-+  return iaddress(r, temp, _masm);
-+}
++  __ add(xbcp, xbcp, j);
++  __ la(xbcp, Address(xbcp, 0));
++  __ dispatch_only(vtos, /*generate_poll*/true);
 +
-+static inline Address at_rsp() {
-+  return Address(esp, 0);
-+}
++  // default case -> j = default offset
++  __ bind(default_case);
++  __ profile_switch_default(i);
++  __ lwu(j, Address(array, -2 * BytesPerInt));
++  __ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend
 +
-+// At top of Java expression stack which may be different than esp().  It
-+// isn't for category 1 objects.
-+static inline Address at_tos   () {
-+  return Address(esp,  Interpreter::expr_offset_in_bytes(0));
-+}
++  __ add(temp, xbcp, j);
++  __ load_unsigned_byte(t0, Address(temp, 0));
 +
-+static inline Address at_tos_p1() {
-+  return Address(esp,  Interpreter::expr_offset_in_bytes(1));
++  __ add(xbcp, xbcp, j);
++  __ la(xbcp, Address(xbcp, 0));
++  __ dispatch_only(vtos, /*generate_poll*/true);
 +}
 +
-+static inline Address at_tos_p2() {
-+  return Address(esp,  Interpreter::expr_offset_in_bytes(2));
-+}
++void TemplateTable::_return(TosState state)
++{
++  transition(state, state);
++  assert(_desc->calls_vm(),
++         "inconsistent calls_vm information"); // call in remove_activation
 +
-+static inline Address at_tos_p3() {
-+  return Address(esp,  Interpreter::expr_offset_in_bytes(3));
-+}
++  if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
++    assert(state == vtos, "only valid state");
 +
-+static inline Address at_tos_p4() {
-+  return Address(esp,  Interpreter::expr_offset_in_bytes(4));
-+}
++    __ ld(c_rarg1, aaddress(0));
++    __ load_klass(x13, c_rarg1);
++    __ lwu(x13, Address(x13, Klass::access_flags_offset()));
++    Label skip_register_finalizer;
++    __ andi(t0, x13, JVM_ACC_HAS_FINALIZER);
++    __ beqz(t0, skip_register_finalizer);
 +
-+static inline Address at_tos_p5() {
-+  return Address(esp,  Interpreter::expr_offset_in_bytes(5));
-+}
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), c_rarg1);
 +
-+// Miscelaneous helper routines
-+// Store an oop (or NULL) at the Address described by obj.
-+// If val == noreg this means store a NULL
-+static void do_oop_store(InterpreterMacroAssembler* _masm,
-+                         Address dst,
-+                         Register val,
-+                         DecoratorSet decorators) {
-+  assert(val == noreg || val == x10, "parameter is just for looks");
-+  assert_cond(_masm != NULL);
-+  __ store_heap_oop(dst, val, x29, x11, decorators);
-+}
++    __ bind(skip_register_finalizer);
++  }
 +
-+static void do_oop_load(InterpreterMacroAssembler* _masm,
-+                        Address src,
-+                        Register dst,
-+                        DecoratorSet decorators) {
-+  assert_cond(_masm != NULL);
-+  __ load_heap_oop(dst, src, x7, x11, decorators);
-+}
++  // Issue a StoreStore barrier after all stores but before return
++  // from any constructor for any class with a final field. We don't
++  // know if this is a finalizer, so we always do so.
++  if (_desc->bytecode() == Bytecodes::_return) {
++    __ membar(MacroAssembler::StoreStore);
++  }
 +
-+Address TemplateTable::at_bcp(int offset) {
-+  assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
-+  return Address(xbcp, offset);
++  // Narrow result if state is itos but result type is smaller.
++  // Need to narrow in the return bytecode rather than in generate_return_entry
++  // since compiled code callers expect the result to already be narrowed.
++  if (state == itos) {
++    __ narrow(x10);
++  }
++
++  __ remove_activation(state);
++  __ ret();
 +}
 +
-+void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
-+                                   Register temp_reg, bool load_bc_into_bc_reg/*=true*/,
-+                                   int byte_no)
-+{
-+  if (!RewriteBytecodes)  { return; }
-+  Label L_patch_done;
 +
-+  switch (bc) {
-+    case Bytecodes::_fast_aputfield:  // fall through
-+    case Bytecodes::_fast_bputfield:  // fall through
-+    case Bytecodes::_fast_zputfield:  // fall through
-+    case Bytecodes::_fast_cputfield:  // fall through
-+    case Bytecodes::_fast_dputfield:  // fall through
-+    case Bytecodes::_fast_fputfield:  // fall through
-+    case Bytecodes::_fast_iputfield:  // fall through
-+    case Bytecodes::_fast_lputfield:  // fall through
-+    case Bytecodes::_fast_sputfield: {
-+      // We skip bytecode quickening for putfield instructions when
-+      // the put_code written to the constant pool cache is zero.
-+      // This is required so that every execution of this instruction
-+      // calls out to InterpreterRuntime::resolve_get_put to do
-+      // additional, required work.
-+      assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
-+      assert(load_bc_into_bc_reg, "we use bc_reg as temp");
-+      __ get_cache_and_index_and_bytecode_at_bcp(temp_reg, bc_reg, temp_reg, byte_no, 1);
-+      __ mv(bc_reg, bc);
-+      __ beqz(temp_reg, L_patch_done);
-+      break;
-+    }
-+    default:
-+      assert(byte_no == -1, "sanity");
-+      // the pair bytecodes have already done the load.
-+      if (load_bc_into_bc_reg) {
-+        __ mv(bc_reg, bc);
-+      }
-+  }
++// ----------------------------------------------------------------------------
++// Volatile variables demand their effects be made known to all CPU's
++// in order.  Store buffers on most chips allow reads & writes to
++// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
++// without some kind of memory barrier (i.e., it's not sufficient that
++// the interpreter does not reorder volatile references, the hardware
++// also must not reorder them).
++//
++// According to the new Java Memory Model (JMM):
++// (1) All volatiles are serialized wrt to each other.  ALSO reads &
++//     writes act as aquire & release, so:
++// (2) A read cannot let unrelated NON-volatile memory refs that
++//     happen after the read float up to before the read.  It's OK for
++//     non-volatile memory refs that happen before the volatile read to
++//     float down below it.
++// (3) Similar a volatile write cannot let unrelated NON-volatile
++//     memory refs that happen BEFORE the write float down to after the
++//     write.  It's OK for non-volatile memory refs that happen after the
++//     volatile write to float up before it.
++//
++// We only put in barriers around volatile refs (they are expensive),
++// not _between_ memory refs (that would require us to track the
++// flavor of the previous memory refs).  Requirements (2) and (3)
++// require some barriers before volatile stores and after volatile
++// loads.  These nearly cover requirement (1) but miss the
++// volatile-store-volatile-load case.  This final case is placed after
++// volatile-stores although it could just as well go before
++// volatile-loads.
 +
-+  if (JvmtiExport::can_post_breakpoint()) {
-+    Label L_fast_patch;
-+    // if a breakpoint is present we can't rewrite the stream directly
-+    __ load_unsigned_byte(temp_reg, at_bcp(0));
-+    __ addi(temp_reg, temp_reg, -Bytecodes::_breakpoint); // temp_reg is temporary register.
-+    __ bnez(temp_reg, L_fast_patch);
-+    // Let breakpoint table handling rewrite to quicker bytecode
-+    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), xmethod, xbcp, bc_reg);
-+    __ j(L_patch_done);
-+    __ bind(L_fast_patch);
-+  }
++void TemplateTable::resolve_cache_and_index(int byte_no,
++                                            Register Rcache,
++                                            Register index,
++                                            size_t index_size) {
++  const Register temp = x9;
++  assert_different_registers(Rcache, index, temp);
 +
-+#ifdef ASSERT
-+  Label L_okay;
-+  __ load_unsigned_byte(temp_reg, at_bcp(0));
-+  __ beq(temp_reg, bc_reg, L_okay);
-+  __ addi(temp_reg, temp_reg, -(int) Bytecodes::java_code(bc));
-+  __ beqz(temp_reg, L_okay);
-+  __ stop("patching the wrong bytecode");
-+  __ bind(L_okay);
-+#endif
++  Label resolved;
 +
-+  // patch bytecode
-+  __ sb(bc_reg, at_bcp(0));
-+  __ bind(L_patch_done);
-+}
++  Bytecodes::Code code = bytecode();
++  switch (code) {
++    case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
++    case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
++    default: break;
++  }
 +
-+// Individual instructions
++  assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
++  __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
++  __ mv(t0, (int) code);
++  __ beq(temp, t0, resolved);
 +
-+void TemplateTable::nop() {
-+  transition(vtos, vtos);
-+  // nothing to do
-+}
++  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
++  __ mv(temp, (int) code);
++  __ call_VM(noreg, entry, temp);
 +
-+void TemplateTable::shouldnotreachhere() {
-+  transition(vtos, vtos);
-+  __ stop("should not reach here bytecode");
++  // Update registers with resolved info
++  __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
++  // n.b. unlike x86 Rcache is now rcpool plus the indexed offset
++  // so all clients ofthis method must be modified accordingly
++  __ bind(resolved);
 +}
 +
-+void TemplateTable::aconst_null()
-+{
-+  transition(vtos, atos);
-+  __ mv(x10, zr);
-+}
++// The Rcache and index registers must be set before call
++// n.b unlike x86 cache already includes the index offset
++void TemplateTable::load_field_cp_cache_entry(Register obj,
++                                              Register cache,
++                                              Register index,
++                                              Register off,
++                                              Register flags,
++                                              bool is_static = false) {
++  assert_different_registers(cache, index, flags, off);
 +
-+void TemplateTable::iconst(int value)
-+{
-+  transition(vtos, itos);
-+  __ li(x10, value);
-+}
++  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
++  // Field offset
++  __ ld(off, Address(cache, in_bytes(cp_base_offset +
++                                     ConstantPoolCacheEntry::f2_offset())));
++  // Flags
++  __ lwu(flags, Address(cache, in_bytes(cp_base_offset +
++                                        ConstantPoolCacheEntry::flags_offset())));
 +
-+void TemplateTable::lconst(int value)
-+{
-+  transition(vtos, ltos);
-+  __ li(x10, value);
++  // klass overwrite register
++  if (is_static) {
++    __ ld(obj, Address(cache, in_bytes(cp_base_offset +
++                                       ConstantPoolCacheEntry::f1_offset())));
++    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++    __ ld(obj, Address(obj, mirror_offset));
++    __ resolve_oop_handle(obj);
++  }
 +}
 +
-+void TemplateTable::fconst(int value)
-+{
-+  transition(vtos, ftos);
-+  static float fBuf[2] = {1.0, 2.0};
-+  __ mv(t0, (intptr_t)fBuf);
-+  switch (value) {
-+    case 0:
-+      __ fmv_w_x(f10, zr);
-+      break;
-+    case 1:
-+      __ flw(f10, t0, 0);
-+      break;
-+    case 2:
-+      __ flw(f10, t0, sizeof(float));
-+      break;
-+    default:
-+      ShouldNotReachHere();
++void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
++                                               Register method,
++                                               Register itable_index,
++                                               Register flags,
++                                               bool is_invokevirtual,
++                                               bool is_invokevfinal, /*unused*/
++                                               bool is_invokedynamic) {
++  // setup registers
++  const Register cache = t1;
++  const Register index = x14;
++  assert_different_registers(method, flags);
++  assert_different_registers(method, cache, index);
++  assert_different_registers(itable_index, flags);
++  assert_different_registers(itable_index, cache, index);
++  // determine constant pool cache field offsets
++  assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant");
++  const int method_offset = in_bytes(ConstantPoolCache::base_offset() +
++                                     (is_invokevirtual ?
++                                      ConstantPoolCacheEntry::f2_offset() :
++                                      ConstantPoolCacheEntry::f1_offset()));
++  const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
++                                    ConstantPoolCacheEntry::flags_offset());
++  // access constant pool cache fields
++  const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
++                                    ConstantPoolCacheEntry::f2_offset());
++
++  const size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2));
++  resolve_cache_and_index(byte_no, cache, index, index_size);
++  __ ld(method, Address(cache, method_offset));
++
++  if (itable_index != noreg) {
++    __ ld(itable_index, Address(cache, index_offset));
 +  }
++  __ lwu(flags, Address(cache, flags_offset));
 +}
 +
-+void TemplateTable::dconst(int value)
-+{
-+  transition(vtos, dtos);
-+  static double dBuf[2] = {1.0, 2.0};
-+  __ mv(t0, (intptr_t)dBuf);
-+  switch (value) {
-+    case 0:
-+      __ fmv_d_x(f10, zr);
-+      break;
-+    case 1:
-+      __ fld(f10, t0, 0);
-+      break;
-+    case 2:
-+      __ fld(f10, t0, sizeof(double));
-+      break;
-+    default:
-+      ShouldNotReachHere();
++// The registers cache and index expected to be set before call.
++// Correct values of the cache and index registers are preserved.
++void TemplateTable::jvmti_post_field_access(Register cache, Register index,
++                                            bool is_static, bool has_tos) {
++  // do the JVMTI work here to avoid disturbing the register state below
++  // We use c_rarg registers here beacause we want to use the register used in
++  // the call to the VM
++  if (JvmtiExport::can_post_field_access()) {
++    // Check to see if a field access watch has been set before we
++    // take the time to call into the VM.
++    Label L1;
++    assert_different_registers(cache, index, x10);
++    int32_t offset = 0;
++    __ la_patchable(t0, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), offset);
++    __ lwu(x10, Address(t0, offset));
++
++    __ beqz(x10, L1);
++
++    __ get_cache_and_index_at_bcp(c_rarg2, c_rarg3, 1);
++    __ la(c_rarg2, Address(c_rarg2, in_bytes(ConstantPoolCache::base_offset())));
++
++    if (is_static) {
++      __ mv(c_rarg1, zr); // NULL object reference
++    } else {
++      __ ld(c_rarg1, at_tos()); // get object pointer without popping it
++      __ verify_oop(c_rarg1);
++    }
++    // c_rarg1: object pointer or NULL
++    // c_rarg2: cache entry pointer
++    // c_rarg3: jvalue object on the stack
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                       InterpreterRuntime::post_field_access),
++                                       c_rarg1, c_rarg2, c_rarg3);
++    __ get_cache_and_index_at_bcp(cache, index, 1);
++    __ bind(L1);
 +  }
 +}
 +
-+void TemplateTable::bipush()
++void TemplateTable::pop_and_check_object(Register r)
 +{
-+  transition(vtos, itos);
-+  __ load_signed_byte(x10, at_bcp(1));
++  __ pop_ptr(r);
++  __ null_check(r);  // for field access must check obj.
++  __ verify_oop(r);
 +}
 +
-+void TemplateTable::sipush()
++void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc)
 +{
-+  transition(vtos, itos);
-+  __ load_unsigned_short(x10, at_bcp(1));
-+  __ revb_w_w(x10, x10);
-+  __ sraiw(x10, x10, 16);
-+}
++  const Register cache     = x12;
++  const Register index     = x13;
++  const Register obj       = x14;
++  const Register off       = x9;
++  const Register flags     = x10;
++  const Register raw_flags = x16;
++  const Register bc        = x14; // uses same reg as obj, so don't mix them
 +
-+void TemplateTable::ldc(bool wide)
-+{
-+  transition(vtos, vtos);
-+  Label call_ldc, notFloat, notClass, notInt, Done;
++  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
++  jvmti_post_field_access(cache, index, is_static, false);
++  load_field_cp_cache_entry(obj, cache, index, off, raw_flags, is_static);
 +
-+  if (wide) {
-+   __ get_unsigned_2_byte_index_at_bcp(x11, 1);
-+  } else {
-+   __ load_unsigned_byte(x11, at_bcp(1));
++  if (!is_static) {
++    // obj is on the stack
++    pop_and_check_object(obj);
 +  }
-+  __ get_cpool_and_tags(x12, x10);
 +
-+  const int base_offset = ConstantPool::header_size() * wordSize;
-+  const int tags_offset = Array<u1>::base_offset_in_bytes();
++  __ add(off, obj, off);
++  const Address field(off);
 +
-+  // get type
-+  __ addi(x13, x11, tags_offset);
-+  __ add(x13, x10, x13);
-+  __ membar(MacroAssembler::AnyAny);
-+  __ lbu(x13, Address(x13, 0));
-+  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
++  Label Done, notByte, notBool, notInt, notShort, notChar,
++              notLong, notFloat, notObj, notDouble;
 +
-+  // unresolved class - get the resolved class
-+  __ mv(t1, (u1)JVM_CONSTANT_UnresolvedClass);
-+  __ beq(x13, t1, call_ldc);
++  __ slli(flags, raw_flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift +
++                                    ConstantPoolCacheEntry::tos_state_bits));
++  __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits);
 +
-+  // unresolved class in error state - call into runtime to throw the error
-+  // from the first resolution attempt
-+  __ mv(t1, (u1)JVM_CONSTANT_UnresolvedClassInError);
-+  __ beq(x13, t1, call_ldc);
++  assert(btos == 0, "change code, btos != 0");
++  __ bnez(flags, notByte);
 +
-+  // resolved class - need to call vm to get java mirror of the class
-+  __ mv(t1, (u1)JVM_CONSTANT_Class);
-+  __ bne(x13, t1, notClass);
++  // Dont't rewrite getstatic, only getfield
++  if (is_static) {
++    rc = may_not_rewrite;
++  }
 +
-+  __ bind(call_ldc);
-+  __ mv(c_rarg1, wide);
-+  call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), c_rarg1);
-+  __ push_ptr(x10);
-+  __ verify_oop(x10);
++  // btos
++  __ access_load_at(T_BYTE, IN_HEAP, x10, field, noreg, noreg);
++  __ push(btos);
++  // Rewrite bytecode to be faster
++  if (rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_bgetfield, bc, x11);
++  }
 +  __ j(Done);
 +
-+  __ bind(notClass);
-+  __ mv(t1, (u1)JVM_CONSTANT_Float);
-+  __ bne(x13, t1, notFloat);
++  __ bind(notByte);
++  __ sub(t0, flags, (u1)ztos);
++  __ bnez(t0, notBool);
 +
-+  // ftos
-+  __ shadd(x11, x11, x12, x11, 3);
-+  __ flw(f10, Address(x11, base_offset));
-+  __ push_f(f10);
++  // ztos (same code as btos)
++  __ access_load_at(T_BOOLEAN, IN_HEAP, x10, field, noreg, noreg);
++  __ push(ztos);
++  // Rewirte bytecode to be faster
++  if (rc == may_rewrite) {
++    // uses btos rewriting, no truncating to t/f bit is needed for getfield
++    patch_bytecode(Bytecodes::_fast_bgetfield, bc, x11);
++  }
 +  __ j(Done);
 +
-+  __ bind(notFloat);
-+
-+  __ mv(t1, (u1)JVM_CONSTANT_Integer);
-+  __ bne(x13, t1, notInt);
++  __ bind(notBool);
++  __ sub(t0, flags, (u1)atos);
++  __ bnez(t0, notObj);
++  // atos
++  do_oop_load(_masm, field, x10, IN_HEAP);
++  __ push(atos);
++  if (rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_agetfield, bc, x11);
++  }
++  __ j(Done);
 +
++  __ bind(notObj);
++  __ sub(t0, flags, (u1)itos);
++  __ bnez(t0, notInt);
 +  // itos
-+  __ shadd(x11, x11, x12, x11, 3);
-+  __ lw(x10, Address(x11, base_offset));
-+  __ push_i(x10);
++  __ access_load_at(T_INT, IN_HEAP, x10, field, noreg, noreg);
++  __ addw(x10, x10, zr); // signed extended
++  __ push(itos);
++  // Rewrite bytecode to be faster
++  if (rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_igetfield, bc, x11);
++  }
 +  __ j(Done);
 +
 +  __ bind(notInt);
-+  condy_helper(Done);
-+
-+  __ bind(Done);
-+}
++  __ sub(t0, flags, (u1)ctos);
++  __ bnez(t0, notChar);
++  // ctos
++  __ access_load_at(T_CHAR, IN_HEAP, x10, field, noreg, noreg);
++  __ push(ctos);
++  // Rewrite bytecode to be faster
++  if (rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_cgetfield, bc, x11);
++  }
++  __ j(Done);
 +
-+// Fast path for caching oop constants.
-+void TemplateTable::fast_aldc(bool wide)
-+{
-+  transition(vtos, atos);
++  __ bind(notChar);
++  __ sub(t0, flags, (u1)stos);
++  __ bnez(t0, notShort);
++  // stos
++  __ access_load_at(T_SHORT, IN_HEAP, x10, field, noreg, noreg);
++  __ push(stos);
++  // Rewrite bytecode to be faster
++  if (rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_sgetfield, bc, x11);
++  }
++  __ j(Done);
 +
-+  const Register result = x10;
-+  const Register tmp = x11;
-+  const Register rarg = x12;
++  __ bind(notShort);
++  __ sub(t0, flags, (u1)ltos);
++  __ bnez(t0, notLong);
++  // ltos
++  __ access_load_at(T_LONG, IN_HEAP, x10, field, noreg, noreg);
++  __ push(ltos);
++  // Rewrite bytecode to be faster
++  if (rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_lgetfield, bc, x11);
++  }
++  __ j(Done);
 +
-+  const int index_size = wide ? sizeof(u2) : sizeof(u1);
++  __ bind(notLong);
++  __ sub(t0, flags, (u1)ftos);
++  __ bnez(t0, notFloat);
++  // ftos
++  __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
++  __ push(ftos);
++  // Rewrite bytecode to be faster
++  if (rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_fgetfield, bc, x11);
++  }
++  __ j(Done);
 +
-+  Label resolved;
++  __ bind(notFloat);
++#ifdef ASSERT
++  __ sub(t0, flags, (u1)dtos);
++  __ bnez(t0, notDouble);
++#endif
++  // dtos
++  __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
++  __ push(dtos);
++  // Rewrite bytecode to be faster
++  if (rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_dgetfield, bc, x11);
++  }
++#ifdef ASSERT
++  __ j(Done);
 +
-+  // We are resolved if the resolved reference cache entry contains a
-+  // non-null object (String, MethodType, etc.)
-+  assert_different_registers(result, tmp);
-+  __ get_cache_index_at_bcp(tmp, 1, index_size);
-+  __ load_resolved_reference_at_index(result, tmp);
-+  __ bnez(result, resolved);
++  __ bind(notDouble);
++  __ stop("Bad state");
++#endif
 +
-+  const address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
++  __ bind(Done);
 +
-+  // first time invocation - must resolve first
-+  __ mv(rarg, (int)bytecode());
-+  __ call_VM(result, entry, rarg);
++  Label notVolatile;
++  __ andi(t0, raw_flags, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
++  __ beqz(t0, notVolatile);
++  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
++  __ bind(notVolatile);
++}
 +
-+  __ bind(resolved);
++void TemplateTable::getfield(int byte_no)
++{
++  getfield_or_static(byte_no, false);
++}
 +
-+  { // Check for the null sentinel.
-+    // If we just called the VM, it already did the mapping for us,
-+    // but it's harmless to retry.
-+    Label notNull;
-+
-+    // Stash null_sentinel address to get its value later
-+    int32_t offset = 0;
-+    __ movptr_with_offset(rarg, Universe::the_null_sentinel_addr(), offset);
-+    __ ld(tmp, Address(rarg, offset));
-+    __ resolve_oop_handle(tmp);
-+    __ bne(result, tmp, notNull);
-+    __ mv(result, zr);  // NULL object reference
-+    __ bind(notNull);
-+  }
-+
-+  if (VerifyOops) {
-+    // Safe to call with 0 result
-+    __ verify_oop(result);
-+  }
++void TemplateTable::nofast_getfield(int byte_no) {
++  getfield_or_static(byte_no, false, may_not_rewrite);
 +}
 +
-+void TemplateTable::ldc2_w()
++void TemplateTable::getstatic(int byte_no)
 +{
-+    transition(vtos, vtos);
-+    Label notDouble, notLong, Done;
-+    __ get_unsigned_2_byte_index_at_bcp(x10, 1);
-+
-+    __ get_cpool_and_tags(x11, x12);
-+    const int base_offset = ConstantPool::header_size() * wordSize;
-+    const int tags_offset = Array<u1>::base_offset_in_bytes();
++  getfield_or_static(byte_no, true);
++}
 +
-+    // get type
-+    __ add(x12, x12, x10);
-+    __ load_unsigned_byte(x12, Address(x12, tags_offset));
-+    __ mv(t1, JVM_CONSTANT_Double);
-+    __ bne(x12, t1, notDouble);
++// The registers cache and index expected to be set before call.
++// The function may destroy various registers, just not the cache and index registers.
++void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
++  transition(vtos, vtos);
 +
-+    // dtos
-+    __ shadd(x12, x10, x11, x12, 3);
-+    __ fld(f10, Address(x12, base_offset));
-+    __ push_d(f10);
-+    __ j(Done);
++  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
 +
-+    __ bind(notDouble);
-+    __ mv(t1, (int)JVM_CONSTANT_Long);
-+    __ bne(x12, t1, notLong);
++  if (JvmtiExport::can_post_field_modification()) {
++    // Check to see if a field modification watch has been set before
++    // we take the time to call into the VM.
++    Label L1;
++    assert_different_registers(cache, index, x10);
++    int32_t offset = 0;
++    __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()), offset);
++    __ lwu(x10, Address(t0, offset));
++    __ beqz(x10, L1);
 +
-+    // ltos
-+    __ shadd(x10, x10, x11, x10, 3);
-+    __ ld(x10, Address(x10, base_offset));
-+    __ push_l(x10);
-+    __ j(Done);
++    __ get_cache_and_index_at_bcp(c_rarg2, t0, 1);
 +
-+    __ bind(notLong);
-+    condy_helper(Done);
-+    __ bind(Done);
++    if (is_static) {
++      // Life is simple. Null out the object pointer.
++      __ mv(c_rarg1, zr);
++    } else {
++      // Life is harder. The stack holds the value on top, followed by
++      // the object. We don't know the size of the value, though; it
++      // could be one or two words depending on its type. As a result,
++      // we must find the type to determine where the object is.
++      __ lwu(c_rarg3, Address(c_rarg2,
++                              in_bytes(cp_base_offset +
++                                       ConstantPoolCacheEntry::flags_offset())));
++      __ srli(c_rarg3, c_rarg3, ConstantPoolCacheEntry::tos_state_shift);
++      ConstantPoolCacheEntry::verify_tos_state_shift();
++      Label nope2, done, ok;
++      __ ld(c_rarg1, at_tos_p1());   // initially assume a one word jvalue
++      __ sub(t0, c_rarg3, ltos);
++      __ beqz(t0, ok);
++      __ sub(t0, c_rarg3, dtos);
++      __ bnez(t0, nope2);
++      __ bind(ok);
++      __ ld(c_rarg1, at_tos_p2());  // ltos (two word jvalue);
++      __ bind(nope2);
++    }
++    // cache entry pointer
++    __ add(c_rarg2, c_rarg2, in_bytes(cp_base_offset));
++    // object (tos)
++    __ mv(c_rarg3, esp);
++    // c_rarg1: object pointer set up above (NULL if static)
++    // c_rarg2: cache entry pointer
++    // c_rarg3: jvalue object on  the stack
++    __ call_VM(noreg,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::post_field_modification),
++                                c_rarg1, c_rarg2, c_rarg3);
++    __ get_cache_and_index_at_bcp(cache, index, 1);
++    __ bind(L1);
++  }
 +}
 +
-+void TemplateTable::condy_helper(Label& Done)
-+{
-+  const Register obj = x10;
-+  const Register rarg = x11;
-+  const Register flags = x12;
-+  const Register off = x13;
-+
-+  const address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
-+
-+  __ mv(rarg, (int) bytecode());
-+  __ call_VM(obj, entry, rarg);
-+
-+  __ get_vm_result_2(flags, xthread);
-+
-+  // VMr = obj = base address to find primitive value to push
-+  // VMr2 = flags = (tos, off) using format of CPCE::_flags
-+  __ mv(off, flags);
-+  __ mv(t0, ConstantPoolCacheEntry::field_index_mask);
-+  __ andrw(off, off, t0);
++void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
++  transition(vtos, vtos);
 +
-+  __ add(off, obj, off);
-+  const Address field(off, 0); // base + R---->base + offset
++  const Register cache = x12;
++  const Register index = x13;
++  const Register obj   = x12;
++  const Register off   = x9;
++  const Register flags = x10;
++  const Register bc    = x14;
 +
-+  __ slli(flags, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + ConstantPoolCacheEntry::tos_state_bits));
-+  __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits); // (1 << 5) - 4 --> 28~31==> flags:0~3
++  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
++  jvmti_post_field_mod(cache, index, is_static);
++  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
 +
-+  switch (bytecode()) {
-+    case Bytecodes::_ldc:   // fall through
-+    case Bytecodes::_ldc_w: {
-+      // tos in (itos, ftos, stos, btos, ctos, ztos)
-+      Label notInt, notFloat, notShort, notByte, notChar, notBool;
-+      __ mv(t1, itos);
-+      __ bne(flags, t1, notInt);
-+      // itos
-+      __ lw(x10, field);
-+      __ push(itos);
-+      __ j(Done);
++  Label Done;
++  __ mv(x15, flags);
 +
-+      __ bind(notInt);
-+      __ mv(t1, ftos);
-+      __ bne(flags, t1, notFloat);
-+      // ftos
-+      __ load_float(field);
-+      __ push(ftos);
-+      __ j(Done);
++  {
++    Label notVolatile;
++    __ andi(t0, x15, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
++    __ beqz(t0, notVolatile);
++    __ membar(MacroAssembler::StoreStore | MacroAssembler::LoadStore);
++    __ bind(notVolatile);
++  }
 +
-+      __ bind(notFloat);
-+      __ mv(t1, stos);
-+      __ bne(flags, t1, notShort);
-+      // stos
-+      __ load_signed_short(x10, field);
-+      __ push(stos);
-+      __ j(Done);
++  Label notByte, notBool, notInt, notShort, notChar,
++        notLong, notFloat, notObj, notDouble;
 +
-+      __ bind(notShort);
-+      __ mv(t1, btos);
-+      __ bne(flags, t1, notByte);
-+      // btos
-+      __ load_signed_byte(x10, field);
-+      __ push(btos);
-+      __ j(Done);
++  __ slli(flags, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift +
++                                ConstantPoolCacheEntry::tos_state_bits));
++  __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits);
 +
-+      __ bind(notByte);
-+      __ mv(t1, ctos);
-+      __ bne(flags, t1, notChar);
-+      // ctos
-+      __ load_unsigned_short(x10, field);
-+      __ push(ctos);
-+      __ j(Done);
++  assert(btos == 0, "change code, btos != 0");
++  __ bnez(flags, notByte);
 +
-+      __ bind(notChar);
-+      __ mv(t1, ztos);
-+      __ bne(flags, t1, notBool);
-+      // ztos
-+      __ load_signed_byte(x10, field);
-+      __ push(ztos);
-+      __ j(Done);
++  // Don't rewrite putstatic, only putfield
++  if (is_static) {
++    rc = may_not_rewrite;
++  }
 +
-+      __ bind(notBool);
-+      break;
++  // btos
++  {
++    __ pop(btos);
++    // field address
++    if (!is_static) {
++      pop_and_check_object(obj);
 +    }
++    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
++    const Address field(off, 0); // off register as temparator register.
++    __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg);
++    if (rc == may_rewrite) {
++      patch_bytecode(Bytecodes::_fast_bputfield, bc, x11, true, byte_no);
++    }
++    __ j(Done);
++  }
 +
-+    case Bytecodes::_ldc2_w: {
-+      Label notLong, notDouble;
-+      __ mv(t1, ltos);
-+      __ bne(flags, t1, notLong);
-+      // ltos
-+      __ ld(x10, field);
-+      __ push(ltos);
-+      __ j(Done);
-+
-+      __ bind(notLong);
-+      __ mv(t1, dtos);
-+      __ bne(flags, t1, notDouble);
-+      // dtos
-+      __ load_double(field);
-+      __ push(dtos);
-+      __ j(Done);
++  __ bind(notByte);
++  __ sub(t0, flags, (u1)ztos);
++  __ bnez(t0, notBool);
 +
-+      __ bind(notDouble);
-+      break;
++  // ztos
++  {
++    __ pop(ztos);
++    // field address
++    if (!is_static) {
++      pop_and_check_object(obj);
 +    }
-+
-+    default:
-+      ShouldNotReachHere();
++    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
++    const Address field(off, 0);
++    __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg);
++    if (rc == may_rewrite) {
++      patch_bytecode(Bytecodes::_fast_zputfield, bc, x11, true, byte_no);
++    }
++    __ j(Done);
 +  }
 +
-+  __ stop("bad ldc/condy");
-+}
++  __ bind(notBool);
++  __ sub(t0, flags, (u1)atos);
++  __ bnez(t0, notObj);
 +
-+void TemplateTable::locals_index(Register reg, int offset)
-+{
-+  __ lbu(reg, at_bcp(offset));
-+  __ neg(reg, reg);
-+}
++  // atos
++  {
++    __ pop(atos);
++    // field address
++    if (!is_static) {
++      pop_and_check_object(obj);
++    }
++    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
++    const Address field(off, 0);
++    // Store into the field
++    do_oop_store(_masm, field, x10, IN_HEAP);
++    if (rc == may_rewrite) {
++      patch_bytecode(Bytecodes::_fast_aputfield, bc, x11, true, byte_no);
++    }
++    __ j(Done);
++  }
 +
-+void TemplateTable::iload() {
-+  iload_internal();
-+}
++  __ bind(notObj);
++  __ sub(t0, flags, (u1)itos);
++  __ bnez(t0, notInt);
 +
-+void TemplateTable::nofast_iload() {
-+  iload_internal(may_not_rewrite);
-+}
++  // itos
++  {
++    __ pop(itos);
++    // field address
++    if (!is_static) {
++      pop_and_check_object(obj);
++    }
++    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
++    const Address field(off, 0);
++    __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg);
++    if (rc == may_rewrite) {
++      patch_bytecode(Bytecodes::_fast_iputfield, bc, x11, true, byte_no);
++    }
++    __ j(Done);
++  }
 +
-+void TemplateTable::iload_internal(RewriteControl rc) {
-+  transition(vtos, itos);
-+  if (RewriteFrequentPairs && rc == may_rewrite) {
-+    Label rewrite, done;
-+    const Register bc = x14;
++  __ bind(notInt);
++  __ sub(t0, flags, (u1)ctos);
++  __ bnez(t0, notChar);
 +
-+    // get next bytecode
-+    __ load_unsigned_byte(x11, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
++  // ctos
++  {
++    __ pop(ctos);
++    // field address
++    if (!is_static) {
++      pop_and_check_object(obj);
++    }
++    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
++    const Address field(off, 0);
++    __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg);
++    if (rc == may_rewrite) {
++      patch_bytecode(Bytecodes::_fast_cputfield, bc, x11, true, byte_no);
++    }
++    __ j(Done);
++  }
 +
-+    // if _iload, wait to rewrite to iload2.  We only want to rewrite the
-+    // last two iloads in a pair.  Comparing against fast_iload means that
-+    // the next bytecode is neither an iload or a caload, and therefore
-+    // an iload pair.
-+    __ mv(t1, Bytecodes::_iload);
-+    __ beq(x11, t1, done);
++  __ bind(notChar);
++  __ sub(t0, flags, (u1)stos);
++  __ bnez(t0, notShort);
 +
-+    // if _fast_iload rewrite to _fast_iload2
-+    __ mv(t1, Bytecodes::_fast_iload);
-+    __ mv(bc, Bytecodes::_fast_iload2);
-+    __ beq(x11, t1, rewrite);
++  // stos
++  {
++    __ pop(stos);
++    // field address
++    if (!is_static) {
++      pop_and_check_object(obj);
++    }
++    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
++    const Address field(off, 0);
++    __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg);
++    if (rc == may_rewrite) {
++      patch_bytecode(Bytecodes::_fast_sputfield, bc, x11, true, byte_no);
++    }
++    __ j(Done);
++  }
 +
-+    // if _caload rewrite to _fast_icaload
-+    __ mv(t1, Bytecodes::_caload);
-+    __ mv(bc, Bytecodes::_fast_icaload);
-+    __ beq(x11, t1, rewrite);
++  __ bind(notShort);
++  __ sub(t0, flags, (u1)ltos);
++  __ bnez(t0, notLong);
 +
-+    // else rewrite to _fast_iload
-+    __ mv(bc, Bytecodes::_fast_iload);
++  // ltos
++  {
++    __ pop(ltos);
++    // field address
++    if (!is_static) {
++      pop_and_check_object(obj);
++    }
++    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
++    const Address field(off, 0);
++    __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg);
++    if (rc == may_rewrite) {
++      patch_bytecode(Bytecodes::_fast_lputfield, bc, x11, true, byte_no);
++    }
++    __ j(Done);
++  }
 +
-+    // rewrite
-+    // bc: new bytecode
-+    __ bind(rewrite);
-+    patch_bytecode(Bytecodes::_iload, bc, x11, false);
-+    __ bind(done);
++  __ bind(notLong);
++  __ sub(t0, flags, (u1)ftos);
++  __ bnez(t0, notFloat);
 +
++  // ftos
++  {
++    __ pop(ftos);
++    // field address
++    if (!is_static) {
++      pop_and_check_object(obj);
++    }
++    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
++    const Address field(off, 0);
++    __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg);
++    if (rc == may_rewrite) {
++      patch_bytecode(Bytecodes::_fast_fputfield, bc, x11, true, byte_no);
++    }
++    __ j(Done);
 +  }
 +
-+  // do iload, get the local value into tos
-+  locals_index(x11);
-+  __ lw(x10, iaddress(x11, x10, _masm));
-+}
++  __ bind(notFloat);
++#ifdef ASSERT
++  __ sub(t0, flags, (u1)dtos);
++  __ bnez(t0, notDouble);
++#endif
 +
-+void TemplateTable::fast_iload2()
-+{
-+  transition(vtos, itos);
-+  locals_index(x11);
-+  __ lw(x10, iaddress(x11, x10, _masm));
-+  __ push(itos);
-+  locals_index(x11, 3);
-+  __ lw(x10, iaddress(x11, x10, _masm));
-+}
++  // dtos
++  {
++    __ pop(dtos);
++    // field address
++    if (!is_static) {
++      pop_and_check_object(obj);
++    }
++    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
++    const Address field(off, 0);
++    __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg);
++    if (rc == may_rewrite) {
++      patch_bytecode(Bytecodes::_fast_dputfield, bc, x11, true, byte_no);
++    }
++  }
 +
-+void TemplateTable::fast_iload()
-+{
-+  transition(vtos, itos);
-+  locals_index(x11);
-+  __ lw(x10, iaddress(x11, x10, _masm));
-+}
++#ifdef ASSERT
++  __ j(Done);
 +
-+void TemplateTable::lload()
-+{
-+  transition(vtos, ltos);
-+  __ lbu(x11, at_bcp(1));
-+  __ slli(x11, x11, LogBytesPerWord);
-+  __ sub(x11, xlocals, x11);
-+  __ ld(x10, Address(x11, Interpreter::local_offset_in_bytes(1)));
-+}
++  __ bind(notDouble);
++  __ stop("Bad state");
++#endif
 +
-+void TemplateTable::fload()
-+{
-+  transition(vtos, ftos);
-+  locals_index(x11);
-+  __ flw(f10, faddress(x11, t0, _masm));
-+}
++  __ bind(Done);
 +
-+void TemplateTable::dload()
-+{
-+  transition(vtos, dtos);
-+  __ lbu(x11, at_bcp(1));
-+  __ slli(x11, x11, LogBytesPerWord);
-+  __ sub(x11, xlocals, x11);
-+  __ fld(f10, Address(x11, Interpreter::local_offset_in_bytes(1)));
++  {
++    Label notVolatile;
++    __ andi(t0, x15, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
++    __ beqz(t0, notVolatile);
++    __ membar(MacroAssembler::StoreLoad | MacroAssembler::StoreStore);
++    __ bind(notVolatile);
++  }
 +}
 +
-+void TemplateTable::aload()
++void TemplateTable::putfield(int byte_no)
 +{
-+  transition(vtos, atos);
-+  locals_index(x11);
-+  __ ld(x10, iaddress(x11, x10, _masm));
-+
-+}
-+
-+void TemplateTable::locals_index_wide(Register reg) {
-+  __ lhu(reg, at_bcp(2));
-+  __ revb_h_h_u(reg, reg); // reverse bytes in half-word and zero-extend
-+  __ neg(reg, reg);
++  putfield_or_static(byte_no, false);
 +}
 +
-+void TemplateTable::wide_iload() {
-+  transition(vtos, itos);
-+  locals_index_wide(x11);
-+  __ lw(x10, iaddress(x11, t0, _masm));
++void TemplateTable::nofast_putfield(int byte_no) {
++  putfield_or_static(byte_no, false, may_not_rewrite);
 +}
 +
-+void TemplateTable::wide_lload()
-+{
-+  transition(vtos, ltos);
-+  __ lhu(x11, at_bcp(2));
-+  __ revb_h_h_u(x11, x11); // reverse bytes in half-word and zero-extend
-+  __ slli(x11, x11, LogBytesPerWord);
-+  __ sub(x11, xlocals, x11);
-+  __ ld(x10, Address(x11, Interpreter::local_offset_in_bytes(1)));
++void TemplateTable::putstatic(int byte_no) {
++  putfield_or_static(byte_no, true);
 +}
 +
-+void TemplateTable::wide_fload()
++void TemplateTable::jvmti_post_fast_field_mod()
 +{
-+  transition(vtos, ftos);
-+  locals_index_wide(x11);
-+  __ flw(f10, faddress(x11, t0, _masm));
-+}
-+
-+void TemplateTable::wide_dload()
-+{
-+  transition(vtos, dtos);
-+  __ lhu(x11, at_bcp(2));
-+  __ revb_h_h_u(x11, x11); // reverse bytes in half-word and zero-extend
-+  __ slli(x11, x11, LogBytesPerWord);
-+  __ sub(x11, xlocals, x11);
-+  __ fld(f10, Address(x11, Interpreter::local_offset_in_bytes(1)));
-+}
++  if (JvmtiExport::can_post_field_modification()) {
++    // Check to see if a field modification watch has been set before
++    // we take the time to call into the VM.
++    Label L2;
++    int32_t offset = 0;
++    __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()), offset);
++    __ lwu(c_rarg3, Address(t0, offset));
++    __ beqz(c_rarg3, L2);
++    __ pop_ptr(x9);                  // copy the object pointer from tos
++    __ verify_oop(x9);
++    __ push_ptr(x9);                 // put the object pointer back on tos
++    // Save tos values before call_VM() clobbers them. Since we have
++    // to do it for every data type, we use the saved values as the
++    // jvalue object.
++    switch (bytecode()) {          // load values into the jvalue object
++      case Bytecodes::_fast_aputfield: __ push_ptr(x10); break;
++      case Bytecodes::_fast_bputfield: // fall through
++      case Bytecodes::_fast_zputfield: // fall through
++      case Bytecodes::_fast_sputfield: // fall through
++      case Bytecodes::_fast_cputfield: // fall through
++      case Bytecodes::_fast_iputfield: __ push_i(x10); break;
++      case Bytecodes::_fast_dputfield: __ push_d(); break;
++      case Bytecodes::_fast_fputfield: __ push_f(); break;
++      case Bytecodes::_fast_lputfield: __ push_l(x10); break;
 +
-+void TemplateTable::wide_aload()
-+{
-+  transition(vtos, atos);
-+  locals_index_wide(x11);
-+  __ ld(x10, aaddress(x11, t0, _masm));
-+}
++      default:
++        ShouldNotReachHere();
++    }
++    __ mv(c_rarg3, esp);             // points to jvalue on the stack
++    // access constant pool cache entry
++    __ get_cache_entry_pointer_at_bcp(c_rarg2, x10, 1);
++    __ verify_oop(x9);
++    // x9: object pointer copied above
++    // c_rarg2: cache entry pointer
++    // c_rarg3: jvalue object on the stack
++    __ call_VM(noreg,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::post_field_modification),
++               x9, c_rarg2, c_rarg3);
 +
-+void TemplateTable::index_check(Register array, Register index)
-+{
-+  // destroys x11, t0
-+  // check array
-+  __ null_check(array, arrayOopDesc::length_offset_in_bytes());
-+  // sign extend index for use by indexed load
-+  // check index
-+  const Register length = t0;
-+  __ lwu(length, Address(array, arrayOopDesc::length_offset_in_bytes()));
-+  if (index != x11) {
-+    assert(x11 != array, "different registers");
-+    __ mv(x11, index);
++    switch (bytecode()) {             // restore tos values
++      case Bytecodes::_fast_aputfield: __ pop_ptr(x10); break;
++      case Bytecodes::_fast_bputfield: // fall through
++      case Bytecodes::_fast_zputfield: // fall through
++      case Bytecodes::_fast_sputfield: // fall through
++      case Bytecodes::_fast_cputfield: // fall through
++      case Bytecodes::_fast_iputfield: __ pop_i(x10); break;
++      case Bytecodes::_fast_dputfield: __ pop_d(); break;
++      case Bytecodes::_fast_fputfield: __ pop_f(); break;
++      case Bytecodes::_fast_lputfield: __ pop_l(x10); break;
++      default: break;
++    }
++    __ bind(L2);
 +  }
-+  Label ok;
-+  __ addw(index, index, zr);
-+  __ bltu(index, length, ok);
-+  __ mv(x13, array);
-+  __ mv(t0, Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
-+  __ jr(t0);
-+  __ bind(ok);
 +}
 +
-+void TemplateTable::iaload()
++void TemplateTable::fast_storefield(TosState state)
 +{
-+  transition(itos, itos);
-+  __ mv(x11, x10);
-+  __ pop_ptr(x10);
-+  // x10: array
-+  // x11: index
-+  index_check(x10, x11); // leaves index in x11
-+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2);
-+  __ shadd(x10, x11, x10, t0, 2);
-+  __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
-+  __ addw(x10, x10, zr); // signed extended
-+}
++  transition(state, vtos);
 +
-+void TemplateTable::laload()
-+{
-+  transition(itos, ltos);
-+  __ mv(x11, x10);
-+  __ pop_ptr(x10);
-+  // x10: array
-+  // x11: index
-+  index_check(x10, x11); // leaves index in x11
-+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3);
-+  __ shadd(x10, x11, x10, t0, 3);
-+  __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
-+}
++  ByteSize base = ConstantPoolCache::base_offset();
 +
-+void TemplateTable::faload()
-+{
-+  transition(itos, ftos);
-+  __ mv(x11, x10);
-+  __ pop_ptr(x10);
-+  // x10: array
-+  // x11: index
-+  index_check(x10, x11); // leaves index in x11
-+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2);
-+  __ shadd(x10, x11, x10, t0, 2);
-+  __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
-+}
++  jvmti_post_fast_field_mod();
 +
-+void TemplateTable::daload()
-+{
-+  transition(itos, dtos);
-+  __ mv(x11, x10);
-+  __ pop_ptr(x10);
-+  // x10: array
-+  // x11: index
-+  index_check(x10, x11); // leaves index in x11
-+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3);
-+  __ shadd(x10, x11, x10, t0, 3);
-+  __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
-+}
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(x12, x11, 1);
 +
-+void TemplateTable::aaload()
-+{
-+  transition(itos, atos);
-+  __ mv(x11, x10);
-+  __ pop_ptr(x10);
-+  // x10: array
-+  // x11: index
-+  index_check(x10, x11); // leaves index in x11
-+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
-+  __ shadd(x10, x11, x10, t0, LogBytesPerHeapOop);
-+  do_oop_load(_masm,
-+              Address(x10),
-+              x10,
-+              IS_ARRAY);
-+}
++  // Must prevent reordering of the following cp cache loads with bytecode load
++  __ membar(MacroAssembler::LoadLoad);
 +
-+void TemplateTable::baload()
-+{
-+  transition(itos, itos);
-+  __ mv(x11, x10);
-+  __ pop_ptr(x10);
-+  // x10: array
-+  // x11: index
-+  index_check(x10, x11); // leaves index in x11
-+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0);
-+  __ shadd(x10, x11, x10, t0, 0);
-+  __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
-+}
++  // test for volatile with x13
++  __ lwu(x13, Address(x12, in_bytes(base +
++                                    ConstantPoolCacheEntry::flags_offset())));
 +
-+void TemplateTable::caload()
-+{
-+ transition(itos, itos);
-+  __ mv(x11, x10);
-+  __ pop_ptr(x10);
-+  // x10: array
-+  // x11: index
-+  index_check(x10, x11); // leaves index in x11
-+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1);
-+  __ shadd(x10, x11, x10, t0, 1);
-+  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
-+}
++  // replace index with field offset from cache entry
++  __ ld(x11, Address(x12, in_bytes(base + ConstantPoolCacheEntry::f2_offset())));
 +
-+// iload followed by caload frequent pair
-+void TemplateTable::fast_icaload()
-+{
-+  transition(vtos, itos);
-+  // load index out of locals
-+  locals_index(x12);
-+  __ lw(x11, iaddress(x12, x11, _masm));
-+  __ pop_ptr(x10);
++  {
++    Label notVolatile;
++    __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
++    __ beqz(t0, notVolatile);
++    __ membar(MacroAssembler::StoreStore | MacroAssembler::LoadStore);
++    __ bind(notVolatile);
++  }
 +
-+  // x10: array
-+  // x11: index
-+  index_check(x10, x11); // leaves index in x11, kills t0
-+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); // addi, max imm is 2^11
-+  __ shadd(x10, x11, x10, t0, 1);
-+  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
-+}
++  // Get object from stack
++  pop_and_check_object(x12);
 +
-+void TemplateTable::saload()
-+{
-+  transition(itos, itos);
-+  __ mv(x11, x10);
-+  __ pop_ptr(x10);
-+  // x10: array
-+  // x11: index
-+  index_check(x10, x11); // leaves index in x11, kills t0
-+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_SHORT) >> 1);
-+  __ shadd(x10, x11, x10, t0, 1);
-+  __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
-+}
++  // field address
++  __ add(x11, x12, x11);
++  const Address field(x11, 0);
 +
-+void TemplateTable::iload(int n)
-+{
-+  transition(vtos, itos);
-+  __ lw(x10, iaddress(n));
-+}
++  // access field
++  switch (bytecode()) {
++    case Bytecodes::_fast_aputfield:
++      do_oop_store(_masm, field, x10, IN_HEAP);
++      break;
++    case Bytecodes::_fast_lputfield:
++      __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg);
++      break;
++    case Bytecodes::_fast_iputfield:
++      __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg);
++      break;
++    case Bytecodes::_fast_zputfield:
++      __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg);
++      break;
++    case Bytecodes::_fast_bputfield:
++      __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg);
++      break;
++    case Bytecodes::_fast_sputfield:
++      __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg);
++      break;
++    case Bytecodes::_fast_cputfield:
++      __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg);
++      break;
++    case Bytecodes::_fast_fputfield:
++      __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg);
++      break;
++    case Bytecodes::_fast_dputfield:
++      __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
 +
-+void TemplateTable::lload(int n)
-+{
-+  transition(vtos, ltos);
-+  __ ld(x10, laddress(n));
++  {
++    Label notVolatile;
++    __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
++    __ beqz(t0, notVolatile);
++    __ membar(MacroAssembler::StoreLoad | MacroAssembler::StoreStore);
++    __ bind(notVolatile);
++  }
 +}
 +
-+void TemplateTable::fload(int n)
++void TemplateTable::fast_accessfield(TosState state)
 +{
-+  transition(vtos, ftos);
-+  __ flw(f10, faddress(n));
-+}
++  transition(atos, state);
++  // Do the JVMTI work here to avoid disturbing the register state below
++  if (JvmtiExport::can_post_field_access()) {
++    // Check to see if a field access watch has been set before we
++    // take the time to call into the VM.
++    Label L1;
++    int32_t offset = 0;
++    __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_access_count_addr()), offset);
++    __ lwu(x12, Address(t0, offset));
++    __ beqz(x12, L1);
++    // access constant pool cache entry
++    __ get_cache_entry_pointer_at_bcp(c_rarg2, t1, 1);
++    __ verify_oop(x10);
++    __ push_ptr(x10);  // save object pointer before call_VM() clobbers it
++    __ mv(c_rarg1, x10);
++    // c_rarg1: object pointer copied above
++    // c_rarg2: cache entry pointer
++    __ call_VM(noreg,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::post_field_access),
++               c_rarg1, c_rarg2);
++    __ pop_ptr(x10); // restore object pointer
++    __ bind(L1);
++  }
 +
-+void TemplateTable::dload(int n)
-+{
-+  transition(vtos, dtos);
-+  __ fld(f10, daddress(n));
-+}
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(x12, x11, 1);
 +
-+void TemplateTable::aload(int n)
-+{
-+  transition(vtos, atos);
-+  __ ld(x10, iaddress(n));
-+}
++  // Must prevent reordering of the following cp cache loads with bytecode load
++  __ membar(MacroAssembler::LoadLoad);
 +
-+void TemplateTable::aload_0() {
-+  aload_0_internal();
-+}
++  __ ld(x11, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
++                                   ConstantPoolCacheEntry::f2_offset())));
++  __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
++                                    ConstantPoolCacheEntry::flags_offset())));
 +
-+void TemplateTable::nofast_aload_0() {
-+  aload_0_internal(may_not_rewrite);
-+}
++  // x10: object
++  __ verify_oop(x10);
++  __ null_check(x10);
++  __ add(x11, x10, x11);
++  const Address field(x11, 0);
 +
-+void TemplateTable::aload_0_internal(RewriteControl rc) {
-+  // According to bytecode histograms, the pairs:
-+  //
-+  // _aload_0, _fast_igetfield
-+  // _aload_0, _fast_agetfield
-+  // _aload_0, _fast_fgetfield
-+  //
-+  // occur frequently. If RewriteFrequentPairs is set, the (slow)
-+  // _aload_0 bytecode checks if the next bytecode is either
-+  // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
-+  // rewrites the current bytecode into a pair bytecode; otherwise it
-+  // rewrites the current bytecode into _fast_aload_0 that doesn't do
-+  // the pair check anymore.
-+  //
-+  // Note: If the next bytecode is _getfield, the rewrite must be
-+  //       delayed, otherwise we may miss an opportunity for a pair.
-+  //
-+  // Also rewrite frequent pairs
-+  //   aload_0, aload_1
-+  //   aload_0, iload_1
-+  // These bytecodes with a small amount of code are most profitable
-+  // to rewrite
-+  if (RewriteFrequentPairs && rc == may_rewrite) {
-+    Label rewrite, done;
-+    const Register bc = x14;
++  // access field
++  switch (bytecode()) {
++    case Bytecodes::_fast_agetfield:
++      do_oop_load(_masm, field, x10, IN_HEAP);
++      __ verify_oop(x10);
++      break;
++    case Bytecodes::_fast_lgetfield:
++      __ access_load_at(T_LONG, IN_HEAP, x10, field, noreg, noreg);
++      break;
++    case Bytecodes::_fast_igetfield:
++      __ access_load_at(T_INT, IN_HEAP, x10, field, noreg, noreg);
++      __ addw(x10, x10, zr); // signed extended
++      break;
++    case Bytecodes::_fast_bgetfield:
++      __ access_load_at(T_BYTE, IN_HEAP, x10, field, noreg, noreg);
++      break;
++    case Bytecodes::_fast_sgetfield:
++      __ access_load_at(T_SHORT, IN_HEAP, x10, field, noreg, noreg);
++      break;
++    case Bytecodes::_fast_cgetfield:
++      __ access_load_at(T_CHAR, IN_HEAP, x10, field, noreg, noreg);
++      break;
++    case Bytecodes::_fast_fgetfield:
++      __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
++      break;
++    case Bytecodes::_fast_dgetfield:
++      __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++  {
++    Label notVolatile;
++    __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
++    __ beqz(t0, notVolatile);
++    __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
++    __ bind(notVolatile);
++  }
++}
 +
-+    // get next bytecode
-+    __ load_unsigned_byte(x11, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
++void TemplateTable::fast_xaccess(TosState state)
++{
++  transition(vtos, state);
 +
-+    // if _getfield then wait with rewrite
-+    __ mv(t1, Bytecodes::Bytecodes::_getfield);
-+    __ beq(x11, t1, done);
++  // get receiver
++  __ ld(x10, aaddress(0));
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(x12, x13, 2);
++  __ ld(x11, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
++                                   ConstantPoolCacheEntry::f2_offset())));
 +
-+    // if _igetfield then rewrite to _fast_iaccess_0
-+    assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
-+    __ mv(t1, Bytecodes::_fast_igetfield);
-+    __ mv(bc, Bytecodes::_fast_iaccess_0);
-+    __ beq(x11, t1, rewrite);
++  // make sure exception is reported in correct bcp range (getfield is
++  // next instruction)
++  __ addi(xbcp, xbcp, 1);
++  __ null_check(x10);
++  switch (state) {
++    case itos:
++      __ add(x10, x10, x11);
++      __ access_load_at(T_INT, IN_HEAP, x10, Address(x10, 0), noreg, noreg);
++      __ addw(x10, x10, zr); // signed extended
++      break;
++    case atos:
++      __ add(x10, x10, x11);
++      do_oop_load(_masm, Address(x10, 0), x10, IN_HEAP);
++      __ verify_oop(x10);
++      break;
++    case ftos:
++      __ add(x10, x10, x11);
++      __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, Address(x10), noreg, noreg);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
 +
-+    // if _agetfield then rewrite to _fast_aaccess_0
-+    assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
-+    __ mv(t1, Bytecodes::_fast_agetfield);
-+    __ mv(bc, Bytecodes::_fast_aaccess_0);
-+    __ beq(x11, t1, rewrite);
++  {
++    Label notVolatile;
++    __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
++                                      ConstantPoolCacheEntry::flags_offset())));
++    __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
++    __ beqz(t0, notVolatile);
++    __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
++    __ bind(notVolatile);
++  }
 +
-+    // if _fgetfield then rewrite to _fast_faccess_0
-+    assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
-+    __ mv(t1, Bytecodes::_fast_fgetfield);
-+    __ mv(bc, Bytecodes::_fast_faccess_0);
-+    __ beq(x11, t1, rewrite);
++  __ sub(xbcp, xbcp, 1);
++}
 +
-+    // else rewrite to _fast_aload0
-+    assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition");
-+    __ mv(bc, Bytecodes::Bytecodes::_fast_aload_0);
++//-----------------------------------------------------------------------------
++// Calls
 +
-+    // rewrite
-+    // bc: new bytecode
-+    __ bind(rewrite);
-+    patch_bytecode(Bytecodes::_aload_0, bc, x11, false);
++void TemplateTable::prepare_invoke(int byte_no,
++                                   Register method, // linked method (or i-klass)
++                                   Register index,  // itable index, MethodType, etc.
++                                   Register recv,   // if caller wants to see it
++                                   Register flags   // if caller wants to test it
++                                   ) {
++  // determine flags
++  const Bytecodes::Code code = bytecode();
++  const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
++  const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
++  const bool is_invokehandle     = code == Bytecodes::_invokehandle;
++  const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
++  const bool is_invokespecial    = code == Bytecodes::_invokespecial;
++  const bool load_receiver       = (recv  != noreg);
++  const bool save_flags          = (flags != noreg);
++  assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
++  assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
++  assert(flags == noreg || flags == x13, "");
++  assert(recv  == noreg || recv  == x12, "");
 +
-+    __ bind(done);
++  // setup registers & access constant pool cache
++  if (recv == noreg) {
++    recv = x12;
++  }
++  if (flags == noreg) {
++    flags = x13;
 +  }
++  assert_different_registers(method, index, recv, flags);
 +
-+  // Do actual aload_0 (must do this after patch_bytecode which might call VM and GC might change oop).
-+  aload(0);
-+}
++  // save 'interpreter return address'
++  __ save_bcp();
 +
-+void TemplateTable::istore()
-+{
-+  transition(itos, vtos);
-+  locals_index(x11);
-+  __ sw(x10, iaddress(x11, t0, _masm));
-+}
++  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
 +
-+void TemplateTable::lstore()
-+{
-+  transition(ltos, vtos);
-+  locals_index(x11);
-+  __ sd(x10, laddress(x11, t0, _masm));
-+}
++  // maybe push appendix to arguments (just before return address)
++  if (is_invokedynamic || is_invokehandle) {
++    Label L_no_push;
++    __ andi(t0, flags, 1UL << ConstantPoolCacheEntry::has_appendix_shift);
++    __ beqz(t0, L_no_push);
++    // Push the appendix as a trailing parameter.
++    // This must be done before we get the receiver,
++    // since the parameter_size includes it.
++    __ push_reg(x9);
++    __ mv(x9, index);
++    assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
++    __ load_resolved_reference_at_index(index, x9);
++    __ pop_reg(x9);
++    __ push_reg(index);  // push appendix (MethodType, CallSite, etc.)
++    __ bind(L_no_push);
++  }
 +
-+void TemplateTable::fstore() {
-+  transition(ftos, vtos);
-+  locals_index(x11);
-+  __ fsw(f10, iaddress(x11, t0, _masm));
-+}
++  // load receiver if needed (note: no return address pushed yet)
++  if (load_receiver) {
++    __ andi(recv, flags, ConstantPoolCacheEntry::parameter_size_mask); // parameter_size_mask = 1 << 8
++    __ shadd(t0, recv, esp, t0, 3);
++    __ ld(recv, Address(t0, -Interpreter::expr_offset_in_bytes(1)));
++    __ verify_oop(recv);
++  }
 +
-+void TemplateTable::dstore() {
-+  transition(dtos, vtos);
-+  locals_index(x11);
-+  __ fsd(f10, daddress(x11, t0, _masm));
++  // compute return type
++  __ slli(t1, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + ConstantPoolCacheEntry::tos_state_bits));
++  __ srli(t1, t1, XLEN - ConstantPoolCacheEntry::tos_state_bits); // (1 << 5) - 4 --> 28~31==> t1:0~3
++
++  // load return address
++  {
++    const address table_addr = (address) Interpreter::invoke_return_entry_table_for(code);
++    __ mv(t0, table_addr);
++    __ shadd(t0, t1, t0, t1, 3);
++    __ ld(ra, Address(t0, 0));
++  }
 +}
 +
-+void TemplateTable::astore()
++void TemplateTable::invokevirtual_helper(Register index,
++                                         Register recv,
++                                         Register flags)
 +{
-+  transition(vtos, vtos);
-+  __ pop_ptr(x10);
-+  locals_index(x11);
-+  __ sd(x10, aaddress(x11, t0, _masm));
-+}
++  // Uses temporary registers x10, x13
++  assert_different_registers(index, recv, x10, x13);
++  // Test for an invoke of a final method
++  Label notFinal;
++  __ andi(t0, flags, 1UL << ConstantPoolCacheEntry::is_vfinal_shift);
++  __ beqz(t0, notFinal);
 +
-+void TemplateTable::wide_istore() {
-+  transition(vtos, vtos);
-+  __ pop_i();
-+  locals_index_wide(x11);
-+  __ sw(x10, iaddress(x11, t0, _masm));
-+}
++  const Register method = index;  // method must be xmethod
++  assert(method == xmethod, "Method must be xmethod for interpreter calling convention");
 +
-+void TemplateTable::wide_lstore() {
-+  transition(vtos, vtos);
-+  __ pop_l();
-+  locals_index_wide(x11);
-+  __ sd(x10, laddress(x11, t0, _masm));
-+}
++  // do the call - the index is actually the method to call
++  // that is, f2 is a vtable index if !is_vfinal, else f2 is a Method*
 +
-+void TemplateTable::wide_fstore() {
-+  transition(vtos, vtos);
-+  __ pop_f();
-+  locals_index_wide(x11);
-+  __ fsw(f10, faddress(x11, t0, _masm));
++  // It's final, need a null check here!
++  __ null_check(recv);
++
++  // profile this call
++  __ profile_final_call(x10);
++  __ profile_arguments_type(x10, method, x14, true);
++
++  __ jump_from_interpreted(method);
++
++  __ bind(notFinal);
++
++  // get receiver klass
++  __ null_check(recv, oopDesc::klass_offset_in_bytes());
++  __ load_klass(x10, recv);
++
++  // profile this call
++  __ profile_virtual_call(x10, xlocals, x13);
++
++  // get target Method & entry point
++  __ lookup_virtual_method(x10, index, method);
++  __ profile_arguments_type(x13, method, x14, true);
++  __ jump_from_interpreted(method);
 +}
 +
-+void TemplateTable::wide_dstore() {
++void TemplateTable::invokevirtual(int byte_no)
++{
 +  transition(vtos, vtos);
-+  __ pop_d();
-+  locals_index_wide(x11);
-+  __ fsd(f10, daddress(x11, t0, _masm));
++  assert(byte_no == f2_byte, "use this argument");
++
++  prepare_invoke(byte_no, xmethod, noreg, x12, x13);
++
++  // xmethod: index (actually a Method*)
++  // x12: receiver
++  // x13: flags
++
++  invokevirtual_helper(xmethod, x12, x13);
 +}
 +
-+void TemplateTable::wide_astore() {
++void TemplateTable::invokespecial(int byte_no)
++{
 +  transition(vtos, vtos);
-+  __ pop_ptr(x10);
-+  locals_index_wide(x11);
-+  __ sd(x10, aaddress(x11, t0, _masm));
-+}
++  assert(byte_no == f1_byte, "use this argument");
 +
-+void TemplateTable::iastore() {
-+  transition(itos, vtos);
-+  __ pop_i(x11);
-+  __ pop_ptr(x13);
-+  // x10: value
-+  // x11: index
-+  // x13: array
-+  index_check(x13, x11); // prefer index in x11
-+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2);
-+  __ shadd(t0, x11, x13, t0, 2);
-+  __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg);
++  prepare_invoke(byte_no, xmethod, noreg,  // get f1 Method*
++                 x12);  // get receiver also for null check
++  __ verify_oop(x12);
++  __ null_check(x12);
++  // do the call
++  __ profile_call(x10);
++  __ profile_arguments_type(x10, xmethod, xbcp, false);
++  __ jump_from_interpreted(xmethod);
 +}
 +
-+void TemplateTable::lastore() {
-+  transition(ltos, vtos);
-+  __ pop_i(x11);
-+  __ pop_ptr(x13);
-+  // x10: value
-+  // x11: index
-+  // x13: array
-+  index_check(x13, x11); // prefer index in x11
-+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3);
-+  __ shadd(t0, x11, x13, t0, 3);
-+  __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg);
-+}
++void TemplateTable::invokestatic(int byte_no)
++{
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this arugment");
 +
-+void TemplateTable::fastore() {
-+  transition(ftos, vtos);
-+  __ pop_i(x11);
-+  __ pop_ptr(x13);
-+  // f10: value
-+  // x11:  index
-+  // x13:  array
-+  index_check(x13, x11); // prefer index in x11
-+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2);
-+  __ shadd(t0, x11, x13, t0, 2);
-+  __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* ftos */, noreg, noreg);
++  prepare_invoke(byte_no, xmethod);  // get f1 Method*
++  // do the call
++  __ profile_call(x10);
++  __ profile_arguments_type(x10, xmethod, x14, false);
++  __ jump_from_interpreted(xmethod);
 +}
 +
-+void TemplateTable::dastore() {
-+  transition(dtos, vtos);
-+  __ pop_i(x11);
-+  __ pop_ptr(x13);
-+  // f10: value
-+  // x11:  index
-+  // x13:  array
-+  index_check(x13, x11); // prefer index in x11
-+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3);
-+  __ shadd(t0, x11, x13, t0, 3);
-+  __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* dtos */, noreg, noreg);
++void TemplateTable::fast_invokevfinal(int byte_no)
++{
++  __ call_Unimplemented();
 +}
 +
-+void TemplateTable::aastore() {
-+  Label is_null, ok_is_subtype, done;
++void TemplateTable::invokeinterface(int byte_no) {
 +  transition(vtos, vtos);
-+  // stack: ..., array, index, value
-+  __ ld(x10, at_tos());    // value
-+  __ ld(x12, at_tos_p1()); // index
-+  __ ld(x13, at_tos_p2()); // array
++  assert(byte_no == f1_byte, "use this argument");
 +
-+  index_check(x13, x12);     // kills x11
-+  __ add(x14, x12, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
-+  __ shadd(x14, x14, x13, x14, LogBytesPerHeapOop);
++  prepare_invoke(byte_no, x10, xmethod,  // get f1 Klass*, f2 Method*
++                 x12, x13);  // recv, flags
 +
-+  Address element_address(x14, 0);
++  // x10: interface klass (from f1)
++  // xmethod: method (from f2)
++  // x12: receiver
++  // x13: flags
 +
-+  // do array store check - check for NULL value first
-+  __ beqz(x10, is_null);
++  // First check for Object case, then private interface method,
++  // then regular interface method.
 +
-+  // Move subklass into x11
-+  __ load_klass(x11, x10);
-+  // Move superklass into x10
-+  __ load_klass(x10, x13);
-+  __ ld(x10, Address(x10,
-+                     ObjArrayKlass::element_klass_offset()));
-+  // Compress array + index * oopSize + 12 into a single register.  Frees x12.
++  // Special case of invokeinterface called for virtual method of
++  // java.lang.Object. See cpCache.cpp for details
++  Label notObjectMethod;
++  __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_forced_virtual_shift);
++  __ beqz(t0, notObjectMethod);
 +
-+  // Generate subtype check.  Blows x12, x15
-+  // Superklass in x10.  Subklass in x11.
-+  __ gen_subtype_check(x11, ok_is_subtype); //todo
++  invokevirtual_helper(xmethod, x12, x13);
++  __ bind(notObjectMethod);
 +
-+  // Come here on failure
-+  // object is at TOS
-+  __ j(Interpreter::_throw_ArrayStoreException_entry);
++  Label no_such_interface;
 +
-+  // Come here on success
-+  __ bind(ok_is_subtype);
++  // Check for private method invocation - indicated by vfinal
++  Label notVFinal;
++  __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_vfinal_shift);
++  __ beqz(t0, notVFinal);
 +
-+  // Get the value we will store
-+  __ ld(x10, at_tos());
-+  // Now store using the appropriate barrier
-+  do_oop_store(_masm, element_address, x10, IS_ARRAY);
-+  __ j(done);
++  // Check receiver klass into x13 - also a null check
++  __ null_check(x12, oopDesc::klass_offset_in_bytes());
++  __ load_klass(x13, x12);
 +
-+  // Have a NULL in x10, x13=array, x12=index.  Store NULL at ary[idx]
-+  __ bind(is_null);
-+  __ profile_null_seen(x12);
++  Label subtype;
++  __ check_klass_subtype(x13, x10, x14, subtype);
++  // If we get here the typecheck failed
++  __ j(no_such_interface);
++  __ bind(subtype);
 +
-+  // Store a NULL
-+  do_oop_store(_masm, element_address, noreg, IS_ARRAY);
++  __ profile_final_call(x10);
++  __ profile_arguments_type(x10, xmethod, x14, true);
++  __ jump_from_interpreted(xmethod);
 +
-+  // Pop stack arguments
-+  __ bind(done);
-+  __ add(esp, esp, 3 * Interpreter::stackElementSize);
++  __ bind(notVFinal);
 +
-+}
++  // Get receiver klass into x13 - also a null check
++  __ restore_locals();
++  __ null_check(x12, oopDesc::klass_offset_in_bytes());
++  __ load_klass(x13, x12);
 +
-+void TemplateTable::bastore()
-+{
-+  transition(itos, vtos);
-+  __ pop_i(x11);
-+  __ pop_ptr(x13);
-+  // x10: value
-+  // x11: index
-+  // x13: array
-+  index_check(x13, x11); // prefer index in x11
++  Label no_such_method;
 +
-+  // Need to check whether array is boolean or byte
-+  // since both types share the bastore bytecode.
-+  __ load_klass(x12, x13);
-+  __ lwu(x12, Address(x12, Klass::layout_helper_offset()));
-+  Label L_skip;
-+  __ andi(t0, x12, Klass::layout_helper_boolean_diffbit());
-+  __ beqz(t0, L_skip);
-+  __ andi(x10, x10, 1);  // if it is a T_BOOLEAN array, mask the stored value to 0/1
-+  __ bind(L_skip);
++  // Preserve method for the throw_AbstractMethodErrorVerbose.
++  __ mv(x28, xmethod);
++  // Receiver subtype check against REFC.
++  // Superklass in x10. Subklass in x13. Blows t1, x30
++  __ lookup_interface_method(// inputs: rec. class, interface, itable index
++                             x13, x10, noreg,
++                             // outputs: scan temp. reg, scan temp. reg
++                             t1, x30,
++                             no_such_interface,
++                             /*return_method=*/false);
 +
-+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0);
++  // profile this call
++  __ profile_virtual_call(x13, x30, x9);
 +
-+  __ add(x11, x13, x11);
-+  __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(x11, 0), x10, noreg, noreg);
-+}
++  // Get declaring interface class from method, and itable index
++  __ ld(x10, Address(xmethod, Method::const_offset()));
++  __ ld(x10, Address(x10, ConstMethod::constants_offset()));
++  __ ld(x10, Address(x10, ConstantPool::pool_holder_offset_in_bytes()));
++  __ lwu(xmethod, Address(xmethod, Method::itable_index_offset()));
++  __ subw(xmethod, xmethod, Method::itable_index_max);
++  __ negw(xmethod, xmethod);
 +
-+void TemplateTable::castore()
-+{
-+  transition(itos, vtos);
-+  __ pop_i(x11);
-+  __ pop_ptr(x13);
-+  // x10: value
-+  // x11: index
-+  // x13: array
-+  index_check(x13, x11); // prefer index in x11
-+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1);
-+  __ shadd(t0, x11, x13, t0, 1);
-+  __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg);
-+}
++  // Preserve recvKlass for throw_AbstractMethodErrorVerbose
++  __ mv(xlocals, x13);
++  __ lookup_interface_method(// inputs: rec. class, interface, itable index
++                             xlocals, x10, xmethod,
++                             // outputs: method, scan temp. reg
++                             xmethod, x30,
++                             no_such_interface);
 +
-+void TemplateTable::sastore()
-+{
-+  castore();
-+}
++  // xmethod: Method to call
++  // x12: receiver
++  // Check for abstract method error
++  // Note: This should be done more efficiently via a throw_abstract_method_error
++  //       interpreter entry point and a conditional jump to it in case of a null
++  //       method.
++  __ beqz(xmethod, no_such_method);
 +
-+void TemplateTable::istore(int n)
-+{
-+  transition(itos, vtos);
-+  __ sd(x10, iaddress(n));
-+}
++  __ profile_arguments_type(x13, xmethod, x30, true);
 +
-+void TemplateTable::lstore(int n)
-+{
-+  transition(ltos, vtos);
-+  __ sd(x10, laddress(n));
-+}
++  // do the call
++  // x12: receiver
++  // xmethod: Method
++  __ jump_from_interpreted(xmethod);
++  __ should_not_reach_here();
 +
-+void TemplateTable::fstore(int n)
-+{
-+  transition(ftos, vtos);
-+  __ fsw(f10, faddress(n));
-+}
++  // exception handling code follows ...
++  // note: must restore interpreter registers to canonical
++  //       state for exception handling to work correctly!
 +
-+void TemplateTable::dstore(int n)
-+{
-+  transition(dtos, vtos);
-+  __ fsd(f10, daddress(n));
-+}
++  __ bind(no_such_method);
++  // throw exception
++  __ restore_bcp();    // bcp must be correct for exception handler   (was destroyed)
++  __ restore_locals(); // make sure locals pointer is correct as well (was destroyed)
++  // Pass arguments for generating a verbose error message.
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), x13, x28);
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
 +
-+void TemplateTable::astore(int n)
-+{
-+  transition(vtos, vtos);
-+  __ pop_ptr(x10);
-+  __ sd(x10, iaddress(n));
++  __ bind(no_such_interface);
++  // throw exceptiong
++  __ restore_bcp();    // bcp must be correct for exception handler   (was destroyed)
++  __ restore_locals(); // make sure locals pointer is correct as well (was destroyed)
++  // Pass arguments for generating a verbose error message.
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                     InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), x13, x10);
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
++  return;
 +}
 +
-+void TemplateTable::pop()
-+{
++void TemplateTable::invokehandle(int byte_no) {
 +  transition(vtos, vtos);
-+  __ addi(esp, esp, Interpreter::stackElementSize);
-+}
++  assert(byte_no == f1_byte, "use this argument");
 +
-+void TemplateTable::pop2()
-+{
-+  transition(vtos, vtos);
-+  __ addi(esp, esp, 2 * Interpreter::stackElementSize);
-+}
++  prepare_invoke(byte_no, xmethod, x10, x12);
++  __ verify_method_ptr(x12);
++  __ verify_oop(x12);
++  __ null_check(x12);
 +
-+void TemplateTable::dup()
-+{
-+  transition(vtos, vtos);
-+  __ ld(x10, Address(esp, 0));
-+  __ push_reg(x10);
-+  // stack: ..., a, a
-+}
++  // FIXME: profile the LambdaForm also
 +
-+void TemplateTable::dup_x1()
-+{
-+  transition(vtos, vtos);
-+  // stack: ..., a, b
-+  __ ld(x10, at_tos());  // load b
-+  __ ld(x12, at_tos_p1());  // load a
-+  __ sd(x10, at_tos_p1());  // store b
-+  __ sd(x12, at_tos());  // store a
-+  __ push_reg(x10);                  // push b
-+  // stack: ..., b, a, b
-+}
++  // x30 is safe to use here as a temp reg because it is about to
++  // be clobbered by jump_from_interpreted().
++  __ profile_final_call(x30);
++  __ profile_arguments_type(x30, xmethod, x14, true);
 +
-+void TemplateTable::dup_x2()
-+{
-+  transition(vtos, vtos);
-+  // stack: ..., a, b, c
-+  __ ld(x10, at_tos());  // load c
-+  __ ld(x12, at_tos_p2());  // load a
-+  __ sd(x10, at_tos_p2());  // store c in a
-+  __ push_reg(x10);      // push c
-+  // stack: ..., c, b, c, c
-+  __ ld(x10, at_tos_p2());  // load b
-+  __ sd(x12, at_tos_p2());  // store a in b
-+  // stack: ..., c, a, c, c
-+  __ sd(x10, at_tos_p1());  // store b in c
-+  // stack: ..., c, a, b, c
++  __ jump_from_interpreted(xmethod);
 +}
 +
-+void TemplateTable::dup2()
-+{
++void TemplateTable::invokedynamic(int byte_no) {
 +  transition(vtos, vtos);
-+  // stack: ..., a, b
-+  __ ld(x10, at_tos_p1());  // load a
-+  __ push_reg(x10);                  // push a
-+  __ ld(x10, at_tos_p1());  // load b
-+  __ push_reg(x10);                  // push b
-+  // stack: ..., a, b, a, b
-+}
++  assert(byte_no == f1_byte, "use this argument");
 +
-+void TemplateTable::dup2_x1()
-+{
-+  transition(vtos, vtos);
-+  // stack: ..., a, b, c
-+  __ ld(x12, at_tos());     // load c
-+  __ ld(x10, at_tos_p1());  // load b
-+  __ push_reg(x10);             // push b
-+  __ push_reg(x12);             // push c
-+  // stack: ..., a, b, c, b, c
-+  __ sd(x12, at_tos_p3());  // store c in b
-+  // stack: ..., a, c, c, b, c
-+  __ ld(x12, at_tos_p4());  // load a
-+  __ sd(x12, at_tos_p2());  // store a in 2nd c
-+  // stack: ..., a, c, a, b, c
-+  __ sd(x10, at_tos_p4());  // store b in a
-+  // stack: ..., b, c, a, b, c
-+}
++  prepare_invoke(byte_no, xmethod, x10);
 +
-+void TemplateTable::dup2_x2()
-+{
-+  transition(vtos, vtos);
-+  // stack: ..., a, b, c, d
-+  __ ld(x12, at_tos());     // load d
-+  __ ld(x10, at_tos_p1());  // load c
-+  __ push_reg(x10);             // push c
-+  __ push_reg(x12);             // push d
-+  // stack: ..., a, b, c, d, c, d
-+  __ ld(x10, at_tos_p4());  // load b
-+  __ sd(x10, at_tos_p2());  // store b in d
-+  __ sd(x12, at_tos_p4());  // store d in b
-+  // stack: ..., a, d, c, b, c, d
-+  __ ld(x12, at_tos_p5());  // load a
-+  __ ld(x10, at_tos_p3());  // load c
-+  __ sd(x12, at_tos_p3());  // store a in c
-+  __ sd(x10, at_tos_p5());  // store c in a
-+  // stack: ..., c, d, a, b, c, d
-+}
++  // x10: CallSite object (from cpool->resolved_references[])
++  // xmethod: MH.linkToCallSite method (from f2)
 +
-+void TemplateTable::swap()
-+{
-+  transition(vtos, vtos);
-+  // stack: ..., a, b
-+  __ ld(x12, at_tos_p1());  // load a
-+  __ ld(x10, at_tos());     // load b
-+  __ sd(x12, at_tos());     // store a in b
-+  __ sd(x10, at_tos_p1());  // store b in a
-+  // stack: ..., b, a
-+}
++  // Note: x10_callsite is already pushed by prepare_invoke
 +
-+void TemplateTable::iop2(Operation op)
-+{
-+  transition(itos, itos);
-+  // x10 <== x11 op x10
-+  __ pop_i(x11);
-+  switch (op) {
-+    case add  : __ addw(x10, x11, x10);  break;
-+    case sub  : __ subw(x10, x11, x10);  break;
-+    case mul  : __ mulw(x10, x11, x10);  break;
-+    case _and : __ andrw(x10, x11, x10); break;
-+    case _or  : __ orrw(x10, x11, x10);  break;
-+    case _xor : __ xorrw(x10, x11, x10); break;
-+    case shl  : __ sllw(x10, x11, x10);  break;
-+    case shr  : __ sraw(x10, x11, x10);  break;
-+    case ushr : __ srlw(x10, x11, x10);  break;
-+    default   : ShouldNotReachHere();
-+  }
++  // %%% should make a type profile for any invokedynamic that takes a ref argument
++  // profile this call
++  __ profile_call(xbcp);
++  __ profile_arguments_type(x13, xmethod, x30, false);
++
++  __ verify_oop(x10);
++
++  __ jump_from_interpreted(xmethod);
 +}
 +
-+void TemplateTable::lop2(Operation op)
-+{
-+  transition(ltos, ltos);
-+  // x10 <== x11 op x10
-+  __ pop_l(x11);
-+  switch (op) {
-+    case add  : __ add(x10, x11, x10);  break;
-+    case sub  : __ sub(x10, x11, x10);  break;
-+    case mul  : __ mul(x10, x11, x10);  break;
-+    case _and : __ andr(x10, x11, x10); break;
-+    case _or  : __ orr(x10, x11, x10);  break;
-+    case _xor : __ xorr(x10, x11, x10); break;
-+    default   : ShouldNotReachHere();
++//-----------------------------------------------------------------------------
++// Allocation
++
++void TemplateTable::_new() {
++  transition(vtos, atos);
++
++  __ get_unsigned_2_byte_index_at_bcp(x13, 1);
++  Label slow_case;
++  Label done;
++  Label initialize_header;
++  Label initialize_object; // including clearing the fields
++
++  __ get_cpool_and_tags(x14, x10);
++  // Make sure the class we're about to instantiate has been resolved.
++  // This is done before loading InstanceKlass to be consistent with the order
++  // how Constant Pool is update (see ConstantPool::klass_at_put)
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++  __ add(t0, x10, x13);
++  __ la(t0, Address(t0, tags_offset));
++  __ membar(MacroAssembler::AnyAny);
++  __ lbu(t0, t0);
++  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
++  __ sub(t1, t0, (u1)JVM_CONSTANT_Class);
++  __ bnez(t1, slow_case);
++
++  // get InstanceKlass
++  __ load_resolved_klass_at_offset(x14, x13, x14, t0);
++
++  // make sure klass is initialized & doesn't have finalizer
++  // make sure klass is fully initialized
++  __ lbu(t0, Address(x14, InstanceKlass::init_state_offset()));
++  __ sub(t1, t0, (u1)InstanceKlass::fully_initialized);
++  __ bnez(t1, slow_case);
++
++  // get instance_size in InstanceKlass (scaled to a count of bytes)
++  __ lwu(x13, Address(x14, Klass::layout_helper_offset()));
++  // test to see if it has a finalizer or is malformed in some way
++  __ andi(t0, x13, Klass::_lh_instance_slow_path_bit);
++  __ bnez(t0, slow_case);
++
++  // Allocate the instance:
++  //  If TLAB is enabled:
++  //    Try to allocate in the TLAB.
++  //    If fails, go to the slow path.
++  //  Else If inline contiguous allocations are enabled:
++  //    Try to allocate in eden.
++  //    If fails due to heap end, go to slow path
++  //
++  //  If TLAB is enabled OR inline contiguous is enabled:
++  //    Initialize the allocation.
++  //    Exit.
++  //  Go to slow path.
++  const bool allow_shared_alloc = Universe::heap()->supports_inline_contig_alloc();
++
++  if (UseTLAB) {
++    __ tlab_allocate(x10, x13, 0, noreg, x11, slow_case);
++
++    if (ZeroTLAB) {
++      // the fields have been already cleared
++      __ j(initialize_header);
++    } else {
++      // initialize both the header and fields
++      __ j(initialize_object);
++    }
++  } else {
++    // Allocation in the shared Eden, if allowed.
++    //
++    // x13: instance size in bytes
++    if (allow_shared_alloc) {
++      __ eden_allocate(x10, x13, 0, x28, slow_case);
++    }
 +  }
-+}
 +
-+void TemplateTable::idiv()
-+{
-+  transition(itos, itos);
-+  // explicitly check for div0
-+  Label no_div0;
-+  __ bnez(x10, no_div0);
-+  __ mv(t0, Interpreter::_throw_ArithmeticException_entry);
-+  __ jr(t0);
-+  __ bind(no_div0);
-+  __ pop_i(x11);
-+  // x10 <== x11 idiv x10
-+  __ corrected_idivl(x10, x11, x10, /* want_remainder */ false);
-+}
++  // If USETLAB or allow_shared_alloc are true, the object is created above and
++  // there is an initialized need. Otherwise, skip and go to the slow path.
++  if (UseTLAB || allow_shared_alloc) {
++    // The object is initialized before the header. If the object size is
++    // zero, go directly to the header initialization.
++    __ bind(initialize_object);
++    __ sub(x13, x13, sizeof(oopDesc));
++    __ beqz(x13, initialize_header);
 +
-+void TemplateTable::irem()
-+{
-+  transition(itos, itos);
-+  // explicitly check for div0
-+  Label no_div0;
-+  __ bnez(x10, no_div0);
-+  __ mv(t0, Interpreter::_throw_ArithmeticException_entry);
-+  __ jr(t0);
-+  __ bind(no_div0);
-+  __ pop_i(x11);
-+  // x10 <== x11 irem x10
-+  __ corrected_idivl(x10, x11, x10, /* want_remainder */ true);
-+}
++    // Initialize obejct fields
++    {
++      __ add(x12, x10, sizeof(oopDesc));
++      Label loop;
++      __ bind(loop);
++      __ sd(zr, Address(x12));
++      __ add(x12, x12, BytesPerLong);
++      __ sub(x13, x13, BytesPerLong);
++      __ bnez(x13, loop);
++    }
 +
-+void TemplateTable::lmul()
-+{
-+  transition(ltos, ltos);
-+  __ pop_l(x11);
-+  __ mul(x10, x10, x11);
-+}
++    // initialize object header only.
++    __ bind(initialize_header);
++    if (UseBiasedLocking) {
++      __ ld(t0, Address(x14, Klass::prototype_header_offset()));
++    } else {
++      __ mv(t0, (intptr_t)markOopDesc::prototype());
++    }
++    __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes()));
++    __ store_klass_gap(x10, zr);   // zero klass gap for compressed oops
++    __ store_klass(x10, x14);      // store klass last
 +
-+void TemplateTable::ldiv()
-+{
-+  transition(ltos, ltos);
-+  // explicitly check for div0
-+  Label no_div0;
-+  __ bnez(x10, no_div0);
-+  __ mv(t0, Interpreter::_throw_ArithmeticException_entry);
-+  __ jr(t0);
-+  __ bind(no_div0);
-+  __ pop_l(x11);
-+  // x10 <== x11 ldiv x10
-+  __ corrected_idivq(x10, x11, x10, /* want_remainder */ false);
-+}
++    {
++      SkipIfEqual skip(_masm, &DTraceAllocProbes, false);
++      // Trigger dtrace event for fastpath
++      __ push(atos); // save the return value
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), x10);
++      __ pop(atos); // restore the return value
++    }
++    __ j(done);
++  }
 +
-+void TemplateTable::lrem()
-+{
-+  transition(ltos, ltos);
-+  // explicitly check for div0
-+  Label no_div0;
-+  __ bnez(x10, no_div0);
-+  __ mv(t0, Interpreter::_throw_ArithmeticException_entry);
-+  __ jr(t0);
-+  __ bind(no_div0);
-+  __ pop_l(x11);
-+  // x10 <== x11 lrem x10
-+  __ corrected_idivq(x10, x11, x10, /* want_remainder */ true);
-+}
++  // slow case
++  __ bind(slow_case);
++  __ get_constant_pool(c_rarg1);
++  __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1);
++  call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), c_rarg1, c_rarg2);
++  __ verify_oop(x10);
 +
-+void TemplateTable::lshl()
-+{
-+  transition(itos, ltos);
-+  // shift count is in x10
-+  __ pop_l(x11);
-+  __ sll(x10, x11, x10);
++  // continue
++  __ bind(done);
++  // Must prevent reordering of stores for object initialization with stores that publish the new object.
++  __ membar(MacroAssembler::StoreStore);
 +}
 +
-+void TemplateTable::lshr()
-+{
-+  transition(itos, ltos);
-+  // shift count is in x10
-+  __ pop_l(x11);
-+  __ sra(x10, x11, x10);
++void TemplateTable::newarray() {
++  transition(itos, atos);
++  __ load_unsigned_byte(c_rarg1, at_bcp(1));
++  __ mv(c_rarg2, x10);
++  call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray),
++          c_rarg1, c_rarg2);
++  // Must prevent reordering of stores for object initialization with stores that publish the new object.
++  __ membar(MacroAssembler::StoreStore);
 +}
 +
-+void TemplateTable::lushr()
-+{
-+  transition(itos, ltos);
-+  // shift count is in x10
-+  __ pop_l(x11);
-+  __ srl(x10, x11, x10);
++void TemplateTable::anewarray() {
++  transition(itos, atos);
++  __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1);
++  __ get_constant_pool(c_rarg1);
++  __ mv(c_rarg3, x10);
++  call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray),
++          c_rarg1, c_rarg2, c_rarg3);
++  // Must prevent reordering of stores for object initialization with stores that publish the new object.
++  __ membar(MacroAssembler::StoreStore);
 +}
 +
-+void TemplateTable::fop2(Operation op)
-+{
-+  transition(ftos, ftos);
-+  switch (op) {
-+    case add:
-+      __ pop_f(f11);
-+      __ fadd_s(f10, f11, f10);
-+      break;
-+    case sub:
-+      __ pop_f(f11);
-+      __ fsub_s(f10, f11, f10);
-+      break;
-+    case mul:
-+      __ pop_f(f11);
-+      __ fmul_s(f10, f11, f10);
-+      break;
-+    case div:
-+      __ pop_f(f11);
-+      __ fdiv_s(f10, f11, f10);
-+      break;
-+    case rem:
-+      __ fmv_s(f11, f10);
-+      __ pop_f(f10);
-+      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem));
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
++void TemplateTable::arraylength() {
++  transition(atos, itos);
++  __ null_check(x10, arrayOopDesc::length_offset_in_bytes());
++  __ lwu(x10, Address(x10, arrayOopDesc::length_offset_in_bytes()));
 +}
 +
-+void TemplateTable::dop2(Operation op)
++void TemplateTable::checkcast()
 +{
-+  transition(dtos, dtos);
-+  switch (op) {
-+    case add:
-+      __ pop_d(f11);
-+      __ fadd_d(f10, f11, f10);
-+      break;
-+    case sub:
-+      __ pop_d(f11);
-+      __ fsub_d(f10, f11, f10);
-+      break;
-+    case mul:
-+      __ pop_d(f11);
-+      __ fmul_d(f10, f11, f10);
-+      break;
-+    case div:
-+      __ pop_d(f11);
-+      __ fdiv_d(f10, f11, f10);
-+      break;
-+    case rem:
-+      __ fmv_d(f11, f10);
-+      __ pop_d(f10);
-+      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem));
-+      break;
-+    default:
-+      ShouldNotReachHere();
++  transition(atos, atos);
++  Label done, is_null, ok_is_subtype, quicked, resolved;
++  __ beqz(x10, is_null);
++
++  // Get cpool & tags index
++  __ get_cpool_and_tags(x12, x13); // x12=cpool, x13=tags array
++  __ get_unsigned_2_byte_index_at_bcp(x9, 1); // x9=index
++  // See if bytecode has already been quicked
++  __ add(t0, x13, Array<u1>::base_offset_in_bytes());
++  __ add(x11, t0, x9);
++  __ membar(MacroAssembler::AnyAny);
++  __ lbu(x11, x11);
++  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
++  __ sub(t0, x11, (u1)JVM_CONSTANT_Class);
++  __ beqz(t0, quicked);
++
++  __ push(atos); // save receiver for result, and for GC
++  call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
++  // vm_result_2 has metadata result
++  __ get_vm_result_2(x10, xthread);
++  __ pop_reg(x13); // restore receiver
++  __ j(resolved);
++
++  // Get superklass in x10 and subklass in x13
++  __ bind(quicked);
++  __ mv(x13, x10); // Save object in x13; x10 needed for subtype check
++  __ load_resolved_klass_at_offset(x12, x9, x10, t0); // x10 = klass
++
++  __ bind(resolved);
++  __ load_klass(x9, x13);
++
++  // Generate subtype check.  Blows x12, x15.  Object in x13.
++  // Superklass in x10.  Subklass in x9.
++  __ gen_subtype_check(x9, ok_is_subtype);
++
++  // Come here on failure
++  __ push_reg(x13);
++  // object is at TOS
++  __ j(Interpreter::_throw_ClassCastException_entry);
++
++  // Come here on success
++  __ bind(ok_is_subtype);
++  __ mv(x10, x13); // Restore object in x13
++
++  // Collect counts on whether this test sees NULLs a lot or not.
++  if (ProfileInterpreter) {
++    __ j(done);
++    __ bind(is_null);
++    __ profile_null_seen(x12);
++  } else {
++    __ bind(is_null);   // same as 'done'
 +  }
++  __ bind(done);
 +}
 +
-+void TemplateTable::ineg()
-+{
-+  transition(itos, itos);
-+  __ negw(x10, x10);
-+}
++void TemplateTable::instanceof() {
++  transition(atos, itos);
++  Label done, is_null, ok_is_subtype, quicked, resolved;
++  __ beqz(x10, is_null);
 +
-+void TemplateTable::lneg()
-+{
-+  transition(ltos, ltos);
-+  __ neg(x10, x10);
-+}
++  // Get cpool & tags index
++  __ get_cpool_and_tags(x12, x13); // x12=cpool, x13=tags array
++  __ get_unsigned_2_byte_index_at_bcp(x9, 1); // x9=index
++  // See if bytecode has already been quicked
++  __ add(t0, x13, Array<u1>::base_offset_in_bytes());
++  __ add(x11, t0, x9);
++  __ membar(MacroAssembler::AnyAny);
++  __ lbu(x11, x11);
++  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
++  __ sub(t0, x11, (u1)JVM_CONSTANT_Class);
++  __ beqz(t0, quicked);
 +
-+void TemplateTable::fneg()
-+{
-+  transition(ftos, ftos);
-+  __ fneg_s(f10, f10);
-+}
++  __ push(atos); // save receiver for result, and for GC
++  call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
++  // vm_result_2 has metadata result
++  __ get_vm_result_2(x10, xthread);
++  __ pop_reg(x13); // restore receiver
++  __ verify_oop(x13);
++  __ load_klass(x13, x13);
++  __ j(resolved);
 +
-+void TemplateTable::dneg()
-+{
-+  transition(dtos, dtos);
-+  __ fneg_d(f10, f10);
++  // Get superklass in x10 and subklass in x13
++  __ bind(quicked);
++  __ load_klass(x13, x10);
++  __ load_resolved_klass_at_offset(x12, x9, x10, t0);
++
++  __ bind(resolved);
++
++  // Generate subtype check.  Blows x12, x15
++  // Superklass in x10.  Subklass in x13.
++  __ gen_subtype_check(x13, ok_is_subtype);
++
++  // Come here on failure
++  __ mv(x10, zr);
++  __ j(done);
++  // Come here on success
++  __ bind(ok_is_subtype);
++  __ mv(x10, 1);
++
++  // Collect counts on whether this test sees NULLs a lot or not.
++  if (ProfileInterpreter) {
++    __ j(done);
++    __ bind(is_null);
++    __ profile_null_seen(x12);
++  } else {
++    __ bind(is_null);   // same as 'done'
++  }
++  __ bind(done);
++  // x10 = 0: obj == NULL or  obj is not an instanceof the specified klass
++  // x10 = 1: obj != NULL and obj is     an instanceof the specified klass
 +}
 +
-+void TemplateTable::iinc()
-+{
++//-----------------------------------------------------------------------------
++// Breakpoints
++void TemplateTable::_breakpoint() {
++  // Note: We get here even if we are single stepping..
++  // jbug inists on setting breakpoints at every bytecode
++  // even if we are in single step mode.
++
 +  transition(vtos, vtos);
-+  __ load_signed_byte(x11, at_bcp(2)); // get constant
-+  locals_index(x12);
-+  __ ld(x10, iaddress(x12, x10, _masm));
-+  __ addw(x10, x10, x11);
-+  __ sd(x10, iaddress(x12, t0, _masm));
++
++  // get the unpatched byte code
++  __ get_method(c_rarg1);
++  __ call_VM(noreg,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::get_original_bytecode_at),
++             c_rarg1, xbcp);
++  __ mv(x9, x10);
++
++  // post the breakpoint event
++  __ call_VM(noreg,
++             CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint),
++             xmethod, xbcp);
++
++  // complete the execution of original bytecode
++  __ mv(t0, x9);
++  __ dispatch_only_normal(vtos);
 +}
 +
-+void TemplateTable::wide_iinc()
-+{
-+  transition(vtos, vtos);
-+  __ lwu(x11, at_bcp(2)); // get constant and index
-+  __ revb_h_w_u(x11, x11); // reverse bytes in half-word (32bit) and zero-extend
-+  __ zero_extend(x12, x11, 16);
-+  __ neg(x12, x12);
-+  __ slli(x11, x11, 32);
-+  __ srai(x11, x11, 48);
-+  __ ld(x10, iaddress(x12, t0, _masm));
-+  __ addw(x10, x10, x11);
-+  __ sd(x10, iaddress(x12, t0, _masm));
++//-----------------------------------------------------------------------------
++// Exceptions
++
++void TemplateTable::athrow() {
++  transition(atos, vtos);
++  __ null_check(x10);
++  __ j(Interpreter::throw_exception_entry());
 +}
 +
-+void TemplateTable::convert()
++//-----------------------------------------------------------------------------
++// Synchronization
++//
++// Note: monitorenter & exit are symmetric routines; which is reflected
++//       in the assembly code structure as well
++//
++// Stack layout:
++//
++// [expressions  ] <--- esp               = expression stack top
++// ..
++// [expressions  ]
++// [monitor entry] <--- monitor block top = expression stack bot
++// ..
++// [monitor entry]
++// [frame data   ] <--- monitor block bot
++// ...
++// [saved fp     ] <--- fp
++void TemplateTable::monitorenter()
 +{
-+  // Checking
-+#ifdef ASSERT
-+  {
-+    TosState tos_in  = ilgl;
-+    TosState tos_out = ilgl;
-+    switch (bytecode()) {
-+      case Bytecodes::_i2l: // fall through
-+      case Bytecodes::_i2f: // fall through
-+      case Bytecodes::_i2d: // fall through
-+      case Bytecodes::_i2b: // fall through
-+      case Bytecodes::_i2c: // fall through
-+      case Bytecodes::_i2s: tos_in = itos; break;
-+      case Bytecodes::_l2i: // fall through
-+      case Bytecodes::_l2f: // fall through
-+      case Bytecodes::_l2d: tos_in = ltos; break;
-+      case Bytecodes::_f2i: // fall through
-+      case Bytecodes::_f2l: // fall through
-+      case Bytecodes::_f2d: tos_in = ftos; break;
-+      case Bytecodes::_d2i: // fall through
-+      case Bytecodes::_d2l: // fall through
-+      case Bytecodes::_d2f: tos_in = dtos; break;
-+      default             : ShouldNotReachHere();
-+    }
-+    switch (bytecode()) {
-+      case Bytecodes::_l2i: // fall through
-+      case Bytecodes::_f2i: // fall through
-+      case Bytecodes::_d2i: // fall through
-+      case Bytecodes::_i2b: // fall through
-+      case Bytecodes::_i2c: // fall through
-+      case Bytecodes::_i2s: tos_out = itos; break;
-+      case Bytecodes::_i2l: // fall through
-+      case Bytecodes::_f2l: // fall through
-+      case Bytecodes::_d2l: tos_out = ltos; break;
-+      case Bytecodes::_i2f: // fall through
-+      case Bytecodes::_l2f: // fall through
-+      case Bytecodes::_d2f: tos_out = ftos; break;
-+      case Bytecodes::_i2d: // fall through
-+      case Bytecodes::_l2d: // fall through
-+      case Bytecodes::_f2d: tos_out = dtos; break;
-+      default             : ShouldNotReachHere();
-+    }
-+    transition(tos_in, tos_out);
-+  }
-+#endif // ASSERT
-+
-+  // Conversion
-+  switch (bytecode()) {
-+    case Bytecodes::_i2l:
-+      __ sign_extend(x10, x10, 32);
-+      break;
-+    case Bytecodes::_i2f:
-+      __ fcvt_s_w(f10, x10);
-+      break;
-+    case Bytecodes::_i2d:
-+      __ fcvt_d_w(f10, x10);
-+      break;
-+    case Bytecodes::_i2b:
-+      __ sign_extend(x10, x10, 8);
-+      break;
-+    case Bytecodes::_i2c:
-+      __ zero_extend(x10, x10, 16);
-+      break;
-+    case Bytecodes::_i2s:
-+      __ sign_extend(x10, x10, 16);
-+      break;
-+    case Bytecodes::_l2i:
-+      __ addw(x10, x10, zr);
-+      break;
-+    case Bytecodes::_l2f:
-+      __ fcvt_s_l(f10, x10);
-+      break;
-+    case Bytecodes::_l2d:
-+      __ fcvt_d_l(f10, x10);
-+      break;
-+    case Bytecodes::_f2i:
-+      __ fcvt_w_s_safe(x10, f10);
-+      break;
-+    case Bytecodes::_f2l:
-+      __ fcvt_l_s_safe(x10, f10);
-+      break;
-+    case Bytecodes::_f2d:
-+      __ fcvt_d_s(f10, f10);
-+      break;
-+    case Bytecodes::_d2i:
-+      __ fcvt_w_d_safe(x10, f10);
-+      break;
-+    case Bytecodes::_d2l:
-+      __ fcvt_l_d_safe(x10, f10);
-+      break;
-+    case Bytecodes::_d2f:
-+      __ fcvt_s_d(f10, f10);
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
-+}
-+
-+void TemplateTable::lcmp()
-+{
-+  transition(ltos, itos);
-+  __ pop_l(x11);
-+  __ cmp_l2i(t0, x11, x10);
-+  __ mv(x10, t0);
-+}
-+
-+void TemplateTable::float_cmp(bool is_float, int unordered_result)
-+{
-+  // For instruction feq, flt and fle, the result is 0 if either operand is NaN
-+  if (is_float) {
-+    __ pop_f(f11);
-+    // if unordered_result < 0:
-+    //   we want -1 for unordered or less than, 0 for equal and 1 for
-+    //   greater than.
-+    // else:
-+    //   we want -1 for less than, 0 for equal and 1 for unordered or
-+    //   greater than.
-+    // f11 primary, f10 secondary
-+    __ float_compare(x10, f11, f10, unordered_result);
-+  } else {
-+    __ pop_d(f11);
-+    // if unordered_result < 0:
-+    //   we want -1 for unordered or less than, 0 for equal and 1 for
-+    //   greater than.
-+    // else:
-+    //   we want -1 for less than, 0 for equal and 1 for unordered or
-+    //   greater than.
-+    // f11 primary, f10 secondary
-+    __ double_compare(x10, f11, f10, unordered_result);
-+  }
-+}
-+
-+void TemplateTable::branch(bool is_jsr, bool is_wide)
-+{
-+  // We might be moving to a safepoint.  The thread which calls
-+  // Interpreter::notice_safepoints() will effectively flush its cache
-+  // when it makes a system call, but we need to do something to
-+  // ensure that we see the changed dispatch table.
-+  __ membar(MacroAssembler::LoadLoad);
++  transition(atos, vtos);
 +
-+  __ profile_taken_branch(x10, x11);
-+  const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
-+                             InvocationCounter::counter_offset();
-+  const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
-+                              InvocationCounter::counter_offset();
++   // check for NULL object
++   __ null_check(x10);
 +
-+  // load branch displacement
-+  if (!is_wide) {
-+    __ lhu(x12, at_bcp(1));
-+    __ revb_h_h(x12, x12); // reverse bytes in half-word and sign-extend
-+  } else {
-+    __ lwu(x12, at_bcp(1));
-+    __ revb_w_w(x12, x12); // reverse bytes in word and sign-extend
-+  }
++   const Address monitor_block_top(
++         fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++   const Address monitor_block_bot(
++         fp, frame::interpreter_frame_initial_sp_offset * wordSize);
++   const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
 +
-+  // Handle all the JSR stuff here, then exit.
-+  // It's much shorter and cleaner than intermingling with the non-JSR
-+  // normal-branch stuff occurring below.
++   Label allocated;
 +
-+  if (is_jsr) {
-+    // compute return address as bci
-+    __ ld(t1, Address(xmethod, Method::const_offset()));
-+    __ add(t1, t1,
-+           in_bytes(ConstMethod::codes_offset()) - (is_wide ? 5 : 3));
-+    __ sub(x11, xbcp, t1);
-+    __ push_i(x11);
-+    // Adjust the bcp by the 16-bit displacement in x12
-+    __ add(xbcp, xbcp, x12);
-+    __ load_unsigned_byte(t0, Address(xbcp, 0));
-+    // load the next target bytecode into t0, it is the argument of dispatch_only
-+    __ dispatch_only(vtos, /*generate_poll*/true);
-+    return;
-+  }
++   // initialize entry pointer
++   __ mv(c_rarg1, zr); // points to free slot or NULL
 +
-+  // Normal (non-jsr) branch handling
++   // find a free slot in the monitor block (result in c_rarg1)
++   {
++     Label entry, loop, exit, notUsed;
++     __ ld(c_rarg3, monitor_block_top); // points to current entry,
++                                        // starting with top-most entry
++     __ la(c_rarg2, monitor_block_bot); // points to word before bottom
 +
-+  // Adjust the bcp by the displacement in x12
-+  __ add(xbcp, xbcp, x12);
++     __ j(entry);
 +
-+  assert(UseLoopCounter || !UseOnStackReplacement,
-+         "on-stack-replacement requires loop counters");
-+  Label backedge_counter_overflow;
-+  Label dispatch;
-+  if (UseLoopCounter) {
-+    // increment backedge counter for backward branches
-+    // x10: MDO
-+    // x11: MDO bumped taken-count
-+    // x12: target offset
-+    __ bgtz(x12, dispatch); // count only if backward branch
++     __ bind(loop);
++     // check if current entry is used
++     // if not used then remember entry in c_rarg1
++     __ ld(t0, Address(c_rarg3, BasicObjectLock::obj_offset_in_bytes()));
++     __ bnez(t0, notUsed);
++     __ mv(c_rarg1, c_rarg3);
++     __ bind(notUsed);
++     // check if current entry is for same object
++     // if same object then stop searching
++     __ beq(x10, t0, exit);
++     // otherwise advance to next entry
++     __ add(c_rarg3, c_rarg3, entry_size);
++     __ bind(entry);
++     // check if bottom reached
++     // if not at bottom then check this entry
++     __ bne(c_rarg3, c_rarg2, loop);
++     __ bind(exit);
++   }
 +
-+    // check if MethodCounters exists
-+    Label has_counters;
-+    __ ld(t0, Address(xmethod, Method::method_counters_offset()));
-+    __ bnez(t0, has_counters);
-+    __ push_reg(x10);
-+    __ push_reg(x11);
-+    __ push_reg(x12);
-+    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
-+            InterpreterRuntime::build_method_counters), xmethod);
-+    __ pop_reg(x12);
-+    __ pop_reg(x11);
-+    __ pop_reg(x10);
-+    __ ld(t0, Address(xmethod, Method::method_counters_offset()));
-+    __ beqz(t0, dispatch); // No MethodCounters allocated, OutOfMemory
-+    __ bind(has_counters);
++   __ bnez(c_rarg1, allocated); // check if a slot has been found and
++                             // if found, continue with that on
 +
-+    Label no_mdo;
-+    int increment = InvocationCounter::count_increment;
-+    if (ProfileInterpreter) {
-+      // Are we profiling?
-+      __ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset())));
-+      __ beqz(x11, no_mdo);
-+      // Increment the MDO backedge counter
-+      const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) +
-+                                         in_bytes(InvocationCounter::counter_offset()));
-+      const Address mask(x11, in_bytes(MethodData::backedge_mask_offset()));
-+      __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
-+                                 x10, t0, false,
-+                                 UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
-+      __ j(dispatch);
-+    }
-+    __ bind(no_mdo);
-+    // Increment backedge counter in MethodCounters*
-+    __ ld(t0, Address(xmethod, Method::method_counters_offset()));
-+    const Address mask(t0, in_bytes(MethodCounters::backedge_mask_offset()));
-+    __ increment_mask_and_jump(Address(t0, be_offset), increment, mask,
-+                               x10, t1, false,
-+                               UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
-+    __ bind(dispatch);
-+  }
++   // allocate one if there's no free slot
++   {
++     Label entry, loop;
++     // 1. compute new pointers            // esp: old expression stack top
++     __ ld(c_rarg1, monitor_block_bot);    // c_rarg1: old expression stack bottom
++     __ sub(esp, esp, entry_size);         // move expression stack top
++     __ sub(c_rarg1, c_rarg1, entry_size); // move expression stack bottom
++     __ mv(c_rarg3, esp);                  // set start value for copy loop
++     __ sd(c_rarg1, monitor_block_bot);    // set new monitor block bottom
++     __ sub(sp, sp, entry_size);           // make room for the monitor
 +
-+  // Pre-load the next target bytecode into t0
-+  __ load_unsigned_byte(t0, Address(xbcp, 0));
++     __ j(entry);
++     // 2. move expression stack contents
++     __ bind(loop);
++     __ ld(c_rarg2, Address(c_rarg3, entry_size)); // load expression stack
++                                                   // word from old location
++     __ sd(c_rarg2, Address(c_rarg3, 0));          // and store it at new location
++     __ add(c_rarg3, c_rarg3, wordSize);           // advance to next word
++     __ bind(entry);
++     __ bne(c_rarg3, c_rarg1, loop);    // check if bottom reached.if not at bottom
++                                        // then copy next word
++   }
 +
-+  // continue with the bytecode @ target
-+  // t0: target bytecode
-+  // xbcp: target bcp
-+  __ dispatch_only(vtos, /*generate_poll*/true);
++   // call run-time routine
++   // c_rarg1: points to monitor entry
++   __ bind(allocated);
 +
-+  if (UseLoopCounter && UseOnStackReplacement) {
-+    // invocation counter overflow
-+    __ bind(backedge_counter_overflow);
-+    __ neg(x12, x12);
-+    __ add(x12, x12, xbcp);     // branch xbcp
-+    // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp)
-+    __ call_VM(noreg,
-+               CAST_FROM_FN_PTR(address,
-+                                InterpreterRuntime::frequency_counter_overflow),
-+               x12);
-+    __ load_unsigned_byte(x11, Address(xbcp, 0));  // restore target bytecode
++   // Increment bcp to point to the next bytecode, so exception
++   // handling for async. exceptions work correctly.
++   // The object has already been poped from the stack, so the
++   // expression stack looks correct.
++   __ addi(xbcp, xbcp, 1);
 +
-+    // x10: osr nmethod (osr ok) or NULL (osr not possible)
-+    // w11: target bytecode
-+    // x12: temporary
-+    __ beqz(x10, dispatch);     // test result -- no osr if null
-+    // nmethod may have been invalidated (VM may block upon call_VM return)
-+    __ lbu(x12, Address(x10, nmethod::state_offset()));
-+    if (nmethod::in_use != 0) {
-+      __ sub(x12, x12, nmethod::in_use);
-+    }
-+    __ bnez(x12, dispatch);
++   // store object
++   __ sd(x10, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
++   __ lock_object(c_rarg1);
 +
-+    // We have the address of an on stack replacement routine in x10
-+    // We need to prepare to execute the OSR method. First we must
-+    // migrate the locals and monitors off of the stack.
++   // check to make sure this monitor doesn't cause stack overflow after locking
++   __ save_bcp();  // in case of exception
++   __ generate_stack_overflow_check(0);
 +
-+    __ mv(x9, x10);                             // save the nmethod
++   // The bcp has already been incremented. Just need to dispatch to
++   // next instruction.
++   __ dispatch_next(vtos);
++}
 +
-+    call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
++void TemplateTable::monitorexit()
++{
++  transition(atos, vtos);
 +
-+    // x10 is OSR buffer, move it to expected parameter location
-+    __ mv(j_rarg0, x10);
++  // check for NULL object
++  __ null_check(x10);
 +
-+    // remove activation
-+    // get sender esp
-+    __ ld(esp,
-+        Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize));
-+    // remove frame anchor
-+    __ leave();
-+    // Ensure compiled code always sees stack at proper alignment
-+    __ andi(sp, esp, -16);
++  const Address monitor_block_top(
++        fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++  const Address monitor_block_bot(
++        fp, frame::interpreter_frame_initial_sp_offset * wordSize);
++  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
 +
-+    // and begin the OSR nmethod
-+    __ ld(t0, Address(x9, nmethod::osr_entry_point_offset()));
-+    __ jr(t0);
-+  }
-+}
++  Label found;
 +
-+void TemplateTable::if_0cmp(Condition cc)
-+{
-+  transition(itos, vtos);
-+  // assume branch is more often taken than not (loops use backward branches)
-+  Label not_taken;
++  // find matching slot
++  {
++    Label entry, loop;
++    __ ld(c_rarg1, monitor_block_top); // points to current entry,
++                                        // starting with top-most entry
++    __ la(c_rarg2, monitor_block_bot); // points to word before bottom
++                                        // of monitor block
++    __ j(entry);
 +
-+  __ addw(x10, x10, zr);
-+  switch (cc) {
-+    case equal:
-+      __ bnez(x10, not_taken);
-+      break;
-+    case not_equal:
-+      __ beqz(x10, not_taken);
-+      break;
-+    case less:
-+      __ bgez(x10, not_taken);
-+      break;
-+    case less_equal:
-+      __ bgtz(x10, not_taken);
-+      break;
-+    case greater:
-+      __ blez(x10, not_taken);
-+      break;
-+    case greater_equal:
-+      __ bltz(x10, not_taken);
-+      break;
-+    default:
-+      break;
++    __ bind(loop);
++    // check if current entry is for same object
++    __ ld(t0, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
++    // if same object then stop searching
++    __ beq(x10, t0, found);
++    // otherwise advance to next entry
++    __ add(c_rarg1, c_rarg1, entry_size);
++    __ bind(entry);
++    // check if bottom reached
++    // if not at bottom then check this entry
++    __ bne(c_rarg1, c_rarg2, loop);
 +  }
 +
-+  branch(false, false);
-+  __ bind(not_taken);
-+  __ profile_not_taken_branch(x10);
-+}
-+
-+void TemplateTable::if_icmp(Condition cc)
-+{
-+  transition(itos, vtos);
-+  // assume branch is more often taken than not (loops use backward branches)
-+  Label not_taken;
-+  __ pop_i(x11);
-+  __ addw(x10, x10, zr);
-+  switch (cc) {
-+    case equal:
-+      __ bne(x11, x10, not_taken);
-+      break;
-+    case not_equal:
-+      __ beq(x11, x10, not_taken);
-+      break;
-+    case less:
-+      __ bge(x11, x10, not_taken);
-+      break;
-+    case less_equal:
-+      __ bgt(x11, x10, not_taken);
-+      break;
-+    case greater:
-+      __ ble(x11, x10, not_taken);
-+      break;
-+    case greater_equal:
-+      __ blt(x11, x10, not_taken);
-+      break;
-+    default:
-+      break;
-+  }
++  // error handling. Unlocking was not block-structured
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++                   InterpreterRuntime::throw_illegal_monitor_state_exception));
++  __ should_not_reach_here();
 +
-+  branch(false, false);
-+  __ bind(not_taken);
-+  __ profile_not_taken_branch(x10);
++  // call run-time routine
++  __ bind(found);
++  __ push_ptr(x10); // make sure object is on stack (contract with oopMaps)
++  __ unlock_object(c_rarg1);
++  __ pop_ptr(x10); // discard object
 +}
 +
-+void TemplateTable::if_nullcmp(Condition cc)
++// Wide instructions
++void TemplateTable::wide()
 +{
-+  transition(atos, vtos);
-+  // assume branch is more often taken than not (loops use backward branches)
-+  Label not_taken;
-+  if (cc == equal) {
-+    __ bnez(x10, not_taken);
-+  } else {
-+    __ beqz(x10, not_taken);
-+  }
-+  branch(false, false);
-+  __ bind(not_taken);
-+  __ profile_not_taken_branch(x10);
++  __ load_unsigned_byte(x9, at_bcp(1));
++  __ mv(t0, (address)Interpreter::_wentry_point);
++  __ shadd(t0, x9, t0, t1, 3);
++  __ ld(t0, Address(t0));
++  __ jr(t0);
 +}
 +
-+void TemplateTable::if_acmp(Condition cc)
-+{
-+  transition(atos, vtos);
-+  // assume branch is more often taken than not (loops use backward branches)
-+  Label not_taken;
-+  __ pop_ptr(x11);
-+
-+  if (cc == equal) {
-+    __ bne(x11, x10, not_taken);
-+  } else if (cc == not_equal) {
-+    __ beq(x11, x10, not_taken);
-+  }
-+  branch(false, false);
-+  __ bind(not_taken);
-+  __ profile_not_taken_branch(x10);
++// Multi arrays
++void TemplateTable::multianewarray() {
++  transition(vtos, atos);
++  __ load_unsigned_byte(x10, at_bcp(3)); // get number of dimensions
++  // last dim is on top of stack; we want address of first one:
++  // first_addr = last_addr + (ndims - 1) * wordSize
++  __ shadd(c_rarg1, x10, esp, c_rarg1, 3);
++  __ sub(c_rarg1, c_rarg1, wordSize);
++  call_VM(x10,
++          CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray),
++          c_rarg1);
++  __ load_unsigned_byte(x11, at_bcp(3));
++  __ shadd(esp, x11, esp, t0, 3);
 +}
+diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.hpp b/src/hotspot/cpu/riscv/templateTable_riscv.hpp
+new file mode 100644
+index 0000000000..fcc86108d2
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/templateTable_riscv.hpp
+@@ -0,0 +1,42 @@
++/*
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+void TemplateTable::ret() {
-+  transition(vtos, vtos);
-+  // We might be moving to a safepoint.  The thread which calls
-+  // Interpreter::notice_safepoints() will effectively flush its cache
-+  // when it makes a system call, but we need to do something to
-+  // ensure that we see the changed dispatch table.
-+  __ membar(MacroAssembler::LoadLoad);
++#ifndef CPU_RISCV_TEMPLATETABLE_RISCV_HPP
++#define CPU_RISCV_TEMPLATETABLE_RISCV_HPP
 +
-+  locals_index(x11);
-+  __ ld(x11, aaddress(x11, t1, _masm)); // get return bci, compute return bcp
-+  __ profile_ret(x11, x12);
-+  __ ld(xbcp, Address(xmethod, Method::const_offset()));
-+  __ add(xbcp, xbcp, x11);
-+  __ addi(xbcp, xbcp, in_bytes(ConstMethod::codes_offset()));
-+  __ dispatch_next(vtos, 0, /*generate_poll*/true);
-+}
++static void prepare_invoke(int byte_no,
++                           Register method,         // linked method (or i-klass)
++                           Register index = noreg,  // itable index, MethodType, etc.
++                           Register recv  = noreg,  // if caller wants to see it
++                           Register flags = noreg   // if caller wants to test it
++                           );
++static void invokevirtual_helper(Register index, Register recv,
++                                 Register flags);
 +
-+void TemplateTable::wide_ret() {
-+  transition(vtos, vtos);
-+  locals_index_wide(x11);
-+  __ ld(x11, aaddress(x11, t0, _masm)); // get return bci, compute return bcp
-+  __ profile_ret(x11, x12);
-+  __ ld(xbcp, Address(xmethod, Method::const_offset()));
-+  __ add(xbcp, xbcp, x11);
-+  __ add(xbcp, xbcp, in_bytes(ConstMethod::codes_offset()));
-+  __ dispatch_next(vtos, 0, /*generate_poll*/true);
-+}
++// Helpers
++static void index_check(Register array, Register index);
 +
-+void TemplateTable::tableswitch() {
-+  Label default_case, continue_execution;
-+  transition(itos, vtos);
-+  // align xbcp
-+  __ la(x11, at_bcp(BytesPerInt));
-+  __ andi(x11, x11, -BytesPerInt);
-+  // load lo & hi
-+  __ lwu(x12, Address(x11, BytesPerInt));
-+  __ lwu(x13, Address(x11, 2 * BytesPerInt));
-+  __ revb_w_w(x12, x12); // reverse bytes in word (32bit) and sign-extend
-+  __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
-+  // check against lo & hi
-+  __ blt(x10, x12, default_case);
-+  __ bgt(x10, x13, default_case);
-+  // lookup dispatch offset
-+  __ subw(x10, x10, x12);
-+  __ shadd(x13, x10, x11, t0, 2);
-+  __ lwu(x13, Address(x13, 3 * BytesPerInt));
-+  __ profile_switch_case(x10, x11, x12);
-+  // continue execution
-+  __ bind(continue_execution);
-+  __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
-+  __ add(xbcp, xbcp, x13);
-+  __ load_unsigned_byte(t0, Address(xbcp));
-+  __ dispatch_only(vtos, /*generate_poll*/true);
-+  // handle default
-+  __ bind(default_case);
-+  __ profile_switch_default(x10);
-+  __ lwu(x13, Address(x11, 0));
-+  __ j(continue_execution);
-+}
++#endif // CPU_RISCV_TEMPLATETABLE_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/vmStructs_riscv.hpp b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp
+new file mode 100644
+index 0000000000..6c89133de0
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp
+@@ -0,0 +1,42 @@
++/*
++ * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+void TemplateTable::lookupswitch() {
-+  transition(itos, itos);
-+  __ stop("lookupswitch bytecode should have been rewritten");
-+}
++#ifndef CPU_RISCV_VMSTRUCTS_RISCV_HPP
++#define CPU_RISCV_VMSTRUCTS_RISCV_HPP
 +
-+void TemplateTable::fast_linearswitch() {
-+  transition(itos, vtos);
-+  Label loop_entry, loop, found, continue_execution;
-+  // bswap x10 so we can avoid bswapping the table entries
-+  __ revb_w_w(x10, x10); // reverse bytes in word (32bit) and sign-extend
-+  // align xbcp
-+  __ la(x9, at_bcp(BytesPerInt)); // btw: should be able to get rid of
-+                                    // this instruction (change offsets
-+                                    // below)
-+  __ andi(x9, x9, -BytesPerInt);
-+  // set counter
-+  __ lwu(x11, Address(x9, BytesPerInt));
-+  __ revb_w(x11, x11);
-+  __ j(loop_entry);
-+  // table search
-+  __ bind(loop);
-+  __ shadd(t0, x11, x9, t0, 3);
-+  __ lw(t0, Address(t0, 2 * BytesPerInt));
-+  __ beq(x10, t0, found);
-+  __ bind(loop_entry);
-+  __ addi(x11, x11, -1);
-+  __ bgez(x11, loop);
-+  // default case
-+  __ profile_switch_default(x10);
-+  __ lwu(x13, Address(x9, 0));
-+  __ j(continue_execution);
-+  // entry found -> get offset
-+  __ bind(found);
-+  __ shadd(t0, x11, x9, t0, 3);
-+  __ lwu(x13, Address(t0, 3 * BytesPerInt));
-+  __ profile_switch_case(x11, x10, x9);
-+  // continue execution
-+  __ bind(continue_execution);
-+  __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
-+  __ add(xbcp, xbcp, x13);
-+  __ lbu(t0, Address(xbcp, 0));
-+  __ dispatch_only(vtos, /*generate_poll*/true);
-+}
++// These are the CPU-specific fields, types and integer
++// constants required by the Serviceability Agent. This file is
++// referenced by vmStructs.cpp.
 +
-+void TemplateTable::fast_binaryswitch() {
-+  transition(itos, vtos);
-+  // Implementation using the following core algorithm:
-+  //
-+  // int binary_search(int key, LookupswitchPair* array, int n)
-+  //   binary_search start:
-+  //   #Binary search according to "Methodik des Programmierens" by
-+  //   # Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
-+  //   int i = 0;
-+  //   int j = n;
-+  //   while (i + 1 < j) do
-+  //     # invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
-+  //     # with      Q: for all i: 0 <= i < n: key < a[i]
-+  //     # where a stands for the array and assuming that the (inexisting)
-+  //     # element a[n] is infinitely big.
-+  //     int h = (i + j) >> 1
-+  //     # i < h < j
-+  //     if (key < array[h].fast_match())
-+  //     then [j = h]
-+  //     else [i = h]
-+  //   end
-+  //   # R: a[i] <= key < a[i+1] or Q
-+  //   # (i.e., if key is within array, i is the correct index)
-+  //   return i
-+  // binary_search end
++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
++  volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*)
 +
++#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
 +
-+  // Register allocation
-+  const Register key   = x10; // already set (tosca)
-+  const Register array = x11;
-+  const Register i     = x12;
-+  const Register j     = x13;
-+  const Register h     = x14;
-+  const Register temp  = x15;
++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
 +
-+  // Find array start
-+  __ la(array, at_bcp(3 * BytesPerInt));  // btw: should be able to
-+                                          // get rid of this
-+                                          // instruction (change
-+                                          // offsets below)
-+  __ andi(array, array, -BytesPerInt);
++#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
 +
-+  // Initialize i & j
-+  __ mv(i, zr);                            // i = 0
-+  __ lwu(j, Address(array, -BytesPerInt)); // j = length(array)
++#endif // CPU_RISCV_VMSTRUCTS_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp
+new file mode 100644
+index 0000000000..6bdce51506
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp
+@@ -0,0 +1,87 @@
++/*
++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  // Convert j into native byteordering
-+  __ revb_w(j, j);
++#include "precompiled.hpp"
++#include "memory/allocation.hpp"
++#include "memory/allocation.inline.hpp"
++#include "runtime/os.inline.hpp"
++#include "vm_version_ext_riscv.hpp"
 +
-+  // And start
-+  Label entry;
-+  __ j(entry);
++// VM_Version_Ext statics
++int VM_Version_Ext::_no_of_threads = 0;
++int VM_Version_Ext::_no_of_cores = 0;
++int VM_Version_Ext::_no_of_sockets = 0;
++bool VM_Version_Ext::_initialized = false;
++char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0};
++char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0};
 +
-+  // binary search loop
-+  {
-+    Label loop;
-+    __ bind(loop);
-+    __ addw(h, i, j);                           // h = i + j
-+    __ srliw(h, h, 1);                          // h = (i + j) >> 1
-+    // if [key < array[h].fast_match()]
-+    // then [j = h]
-+    // else [i = h]
-+    // Convert array[h].match to native byte-ordering before compare
-+    __ shadd(temp, h, array, temp, 3);
-+    __ ld(temp, Address(temp, 0));
-+    __ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend
++void VM_Version_Ext::initialize_cpu_information(void) {
++  // do nothing if cpu info has been initialized
++  if (_initialized) {
++    return;
++  }
 +
-+    Label L_done, L_greater;
-+    __ bge(key, temp, L_greater);
-+    // if [key < array[h].fast_match()] then j = h
-+    __ mv(j, h);
-+    __ j(L_done);
-+    __ bind(L_greater);
-+    // if [key >= array[h].fast_match()] then i = h
-+    __ mv(i, h);
-+    __ bind(L_done);
++  _no_of_cores  = os::processor_count();
++  _no_of_threads = _no_of_cores;
++  _no_of_sockets = _no_of_cores;
++  snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64");
++  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string);
++  _initialized = true;
++}
 +
-+    // while [i + 1 < j]
-+    __ bind(entry);
-+    __ addiw(h, i, 1);         // i + 1
-+    __ blt(h, j, loop);        // i + 1 < j
++int VM_Version_Ext::number_of_threads(void) {
++  initialize_cpu_information();
++  return _no_of_threads;
++}
++
++int VM_Version_Ext::number_of_cores(void) {
++  initialize_cpu_information();
++  return _no_of_cores;
++}
++
++int VM_Version_Ext::number_of_sockets(void) {
++  initialize_cpu_information();
++  return _no_of_sockets;
++}
++
++const char* VM_Version_Ext::cpu_name(void) {
++  initialize_cpu_information();
++  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing);
++  if (NULL == tmp) {
++    return NULL;
 +  }
++  strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE);
++  return tmp;
++}
 +
-+  // end of binary search, result index is i (must check again!)
-+  Label default_case;
-+  // Convert array[i].match to native byte-ordering before compare
-+  __ shadd(temp, i, array, temp, 3);
-+  __ ld(temp, Address(temp, 0));
-+  __ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend
-+  __ bne(key, temp, default_case);
++const char* VM_Version_Ext::cpu_description(void) {
++  initialize_cpu_information();
++  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing);
++  if (NULL == tmp) {
++    return NULL;
++  }
++  strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
++  return tmp;
++}
+diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp
+new file mode 100644
+index 0000000000..711e4aeaf6
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp
+@@ -0,0 +1,55 @@
++/*
++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  // entry found -> j = offset
-+  __ shadd(temp, i, array, temp, 3);
-+  __ lwu(j, Address(temp, BytesPerInt));
-+  __ profile_switch_case(i, key, array);
-+  __ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend
++#ifndef CPU_RISCV_VM_VERSION_EXT_RISCV_HPP
++#define CPU_RISCV_VM_VERSION_EXT_RISCV_HPP
 +
-+  __ add(temp, xbcp, j);
-+  __ load_unsigned_byte(t0, Address(temp, 0));
++#include "runtime/vm_version.hpp"
++#include "utilities/macros.hpp"
 +
-+  __ add(xbcp, xbcp, j);
-+  __ la(xbcp, Address(xbcp, 0));
-+  __ dispatch_only(vtos, /*generate_poll*/true);
++class VM_Version_Ext : public VM_Version {
++ private:
++  static const size_t      CPU_TYPE_DESC_BUF_SIZE = 256;
++  static const size_t      CPU_DETAILED_DESC_BUF_SIZE = 4096;
 +
-+  // default case -> j = default offset
-+  __ bind(default_case);
-+  __ profile_switch_default(i);
-+  __ lwu(j, Address(array, -2 * BytesPerInt));
-+  __ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend
++  static int               _no_of_threads;
++  static int               _no_of_cores;
++  static int               _no_of_sockets;
++  static bool              _initialized;
++  static char              _cpu_name[CPU_TYPE_DESC_BUF_SIZE];
++  static char              _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE];
 +
-+  __ add(temp, xbcp, j);
-+  __ load_unsigned_byte(t0, Address(temp, 0));
++ public:
++  static int number_of_threads(void);
++  static int number_of_cores(void);
++  static int number_of_sockets(void);
 +
-+  __ add(xbcp, xbcp, j);
-+  __ la(xbcp, Address(xbcp, 0));
-+  __ dispatch_only(vtos, /*generate_poll*/true);
-+}
++  static const char* cpu_name(void);
++  static const char* cpu_description(void);
++  static void initialize_cpu_information(void);
 +
-+void TemplateTable::_return(TosState state)
-+{
-+  transition(state, state);
-+  assert(_desc->calls_vm(),
-+         "inconsistent calls_vm information"); // call in remove_activation
++};
 +
-+  if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
-+    assert(state == vtos, "only valid state");
++#endif // CPU_RISCV_VM_VERSION_EXT_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
+new file mode 100644
+index 0000000000..0e8f526bd9
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
+@@ -0,0 +1,209 @@
++/*
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+    __ ld(c_rarg1, aaddress(0));
-+    __ load_klass(x13, c_rarg1);
-+    __ lwu(x13, Address(x13, Klass::access_flags_offset()));
-+    Label skip_register_finalizer;
-+    __ andi(t0, x13, JVM_ACC_HAS_FINALIZER);
-+    __ beqz(t0, skip_register_finalizer);
++#include "precompiled.hpp"
++#include "runtime/java.hpp"
++#include "runtime/os.hpp"
++#include "runtime/vm_version.hpp"
++#include "utilities/formatBuffer.hpp"
++#include "utilities/macros.hpp"
 +
-+    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), c_rarg1);
++#include OS_HEADER_INLINE(os)
 +
-+    __ bind(skip_register_finalizer);
-+  }
++const char* VM_Version::_uarch = "";
++const char* VM_Version::_vm_mode = "";
++uint32_t VM_Version::_initial_vector_length = 0;
 +
-+  // Issue a StoreStore barrier after all stores but before return
-+  // from any constructor for any class with a final field. We don't
-+  // know if this is a finalizer, so we always do so.
-+  if (_desc->bytecode() == Bytecodes::_return) {
-+    __ membar(MacroAssembler::StoreStore);
++void VM_Version::initialize() {
++  get_os_cpu_info();
++
++  // check if satp.mode is supported, currently supports up to SV48(RV64)
++  if (get_satp_mode() > VM_SV48) {
++    vm_exit_during_initialization(
++      err_msg("Unsupported satp mode: %s. Only satp modes up to sv48 are supported for now.",
++              _vm_mode));
 +  }
 +
-+  // Narrow result if state is itos but result type is smaller.
-+  // Need to narrow in the return bytecode rather than in generate_return_entry
-+  // since compiled code callers expect the result to already be narrowed.
-+  if (state == itos) {
-+    __ narrow(x10);
++  if (FLAG_IS_DEFAULT(UseFMA)) {
++    FLAG_SET_DEFAULT(UseFMA, true);
 +  }
 +
-+  __ remove_activation(state);
-+  __ ret();
-+}
++  if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchDistance, 0);
++  }
 +
++  if (UseAES || UseAESIntrinsics) {
++    if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
++      warning("AES instructions are not available on this CPU");
++      FLAG_SET_DEFAULT(UseAES, false);
++    }
++    if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
++      warning("AES intrinsics are not available on this CPU");
++      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
++    }
++  }
 +
-+// ----------------------------------------------------------------------------
-+// Volatile variables demand their effects be made known to all CPU's
-+// in order.  Store buffers on most chips allow reads & writes to
-+// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
-+// without some kind of memory barrier (i.e., it's not sufficient that
-+// the interpreter does not reorder volatile references, the hardware
-+// also must not reorder them).
-+//
-+// According to the new Java Memory Model (JMM):
-+// (1) All volatiles are serialized wrt to each other.  ALSO reads &
-+//     writes act as aquire & release, so:
-+// (2) A read cannot let unrelated NON-volatile memory refs that
-+//     happen after the read float up to before the read.  It's OK for
-+//     non-volatile memory refs that happen before the volatile read to
-+//     float down below it.
-+// (3) Similar a volatile write cannot let unrelated NON-volatile
-+//     memory refs that happen BEFORE the write float down to after the
-+//     write.  It's OK for non-volatile memory refs that happen after the
-+//     volatile write to float up before it.
-+//
-+// We only put in barriers around volatile refs (they are expensive),
-+// not _between_ memory refs (that would require us to track the
-+// flavor of the previous memory refs).  Requirements (2) and (3)
-+// require some barriers before volatile stores and after volatile
-+// loads.  These nearly cover requirement (1) but miss the
-+// volatile-store-volatile-load case.  This final case is placed after
-+// volatile-stores although it could just as well go before
-+// volatile-loads.
++  if (UseAESCTRIntrinsics) {
++    warning("AES/CTR intrinsics are not available on this CPU");
++    FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
++  }
 +
-+void TemplateTable::resolve_cache_and_index(int byte_no,
-+                                            Register Rcache,
-+                                            Register index,
-+                                            size_t index_size) {
-+  const Register temp = x9;
-+  assert_different_registers(Rcache, index, temp);
++  if (UseSHA) {
++    warning("SHA instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseSHA, false);
++  }
 +
-+  Label resolved, clinit_barrier_slow;
++  if (UseSHA1Intrinsics) {
++    warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
++    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
++  }
 +
-+  Bytecodes::Code code = bytecode();
-+  switch (code) {
-+    case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
-+    case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
-+    default: break;
++  if (UseSHA256Intrinsics) {
++    warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
++    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
 +  }
 +
-+  assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
-+  __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
-+  __ mv(t0, (int) code);
-+  __ beq(temp, t0, resolved);
++  if (UseSHA512Intrinsics) {
++    warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
++    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
++  }
 +
-+  // resolve first time through
-+  // Class initialization barrier slow path lands here as well.
-+  __ bind(clinit_barrier_slow);
++  if (UseCRC32Intrinsics) {
++    warning("CRC32 intrinsics are not available on this CPU.");
++    FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
++  }
 +
-+  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
-+  __ mv(temp, (int) code);
-+  __ call_VM(noreg, entry, temp);
++  if (UseCRC32CIntrinsics) {
++    warning("CRC32C intrinsics are not available on this CPU.");
++    FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
++  }
 +
-+  // Update registers with resolved info
-+  __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
-+  // n.b. unlike x86 Rcache is now rcpool plus the indexed offset
-+  // so all clients ofthis method must be modified accordingly
-+  __ bind(resolved);
++  if (UseRVV) {
++    if (!(_features & CPU_V)) {
++      warning("RVV is not supported on this CPU");
++      FLAG_SET_DEFAULT(UseRVV, false);
++    } else {
++      // read vector length from vector CSR vlenb
++      _initial_vector_length = get_current_vector_length();
++    }
++  }
 +
-+  // Class initialization barrier for static methods
-+  if (VM_Version::supports_fast_class_init_checks() && bytecode() == Bytecodes::_invokestatic) {
-+    __ load_resolved_method_at_index(byte_no, temp, Rcache);
-+    __ load_method_holder(temp, temp);
-+    __ clinit_barrier(temp, t0, NULL, &clinit_barrier_slow);
++  if (UseRVC && !(_features & CPU_C)) {
++    warning("RVC is not supported on this CPU");
++    FLAG_SET_DEFAULT(UseRVC, false);
 +  }
-+}
 +
-+// The Rcache and index registers must be set before call
-+// n.b unlike x86 cache already includes the index offset
-+void TemplateTable::load_field_cp_cache_entry(Register obj,
-+                                              Register cache,
-+                                              Register index,
-+                                              Register off,
-+                                              Register flags,
-+                                              bool is_static = false) {
-+  assert_different_registers(cache, index, flags, off);
-+
-+  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
-+  // Field offset
-+  __ ld(off, Address(cache, in_bytes(cp_base_offset +
-+                                     ConstantPoolCacheEntry::f2_offset())));
-+  // Flags
-+  __ lwu(flags, Address(cache, in_bytes(cp_base_offset +
-+                                        ConstantPoolCacheEntry::flags_offset())));
++  if (FLAG_IS_DEFAULT(AvoidUnalignedAccesses)) {
++    FLAG_SET_DEFAULT(AvoidUnalignedAccesses, true);
++  }
 +
-+  // klass overwrite register
-+  if (is_static) {
-+    __ ld(obj, Address(cache, in_bytes(cp_base_offset +
-+                                       ConstantPoolCacheEntry::f1_offset())));
-+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
-+    __ ld(obj, Address(obj, mirror_offset));
-+    __ resolve_oop_handle(obj);
++  if (UseZbb) {
++    if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
++      FLAG_SET_DEFAULT(UsePopCountInstruction, true);
++    }
++  } else {
++    FLAG_SET_DEFAULT(UsePopCountInstruction, false);
 +  }
-+}
 +
-+void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
-+                                               Register method,
-+                                               Register itable_index,
-+                                               Register flags,
-+                                               bool is_invokevirtual,
-+                                               bool is_invokevfinal, /*unused*/
-+                                               bool is_invokedynamic) {
-+  // setup registers
-+  const Register cache = t1;
-+  const Register index = x14;
-+  assert_different_registers(method, flags);
-+  assert_different_registers(method, cache, index);
-+  assert_different_registers(itable_index, flags);
-+  assert_different_registers(itable_index, cache, index);
-+  // determine constant pool cache field offsets
-+  assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant");
-+  const int method_offset = in_bytes(ConstantPoolCache::base_offset() +
-+                                     (is_invokevirtual ?
-+                                      ConstantPoolCacheEntry::f2_offset() :
-+                                      ConstantPoolCacheEntry::f1_offset()));
-+  const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
-+                                    ConstantPoolCacheEntry::flags_offset());
-+  // access constant pool cache fields
-+  const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
-+                                    ConstantPoolCacheEntry::f2_offset());
++  char buf[512];
++  buf[0] = '\0';
++  if (_uarch != NULL && strcmp(_uarch, "") != 0) snprintf(buf, sizeof(buf), "%s,", _uarch);
++  strcat(buf, "rv64");
++#define ADD_FEATURE_IF_SUPPORTED(id, name, bit) if (_features & CPU_##id) strcat(buf, name);
++  CPU_FEATURE_FLAGS(ADD_FEATURE_IF_SUPPORTED)
++#undef ADD_FEATURE_IF_SUPPORTED
 +
-+  const size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2));
-+  resolve_cache_and_index(byte_no, cache, index, index_size);
-+  __ ld(method, Address(cache, method_offset));
++  _features_string = os::strdup(buf);
 +
-+  if (itable_index != noreg) {
-+    __ ld(itable_index, Address(cache, index_offset));
-+  }
-+  __ lwu(flags, Address(cache, flags_offset));
++#ifdef COMPILER2
++  c2_initialize();
++#endif // COMPILER2
++
++  UNSUPPORTED_OPTION(CriticalJNINatives);
++
++  FLAG_SET_DEFAULT(UseMembar, true);
 +}
 +
-+// The registers cache and index expected to be set before call.
-+// Correct values of the cache and index registers are preserved.
-+void TemplateTable::jvmti_post_field_access(Register cache, Register index,
-+                                            bool is_static, bool has_tos) {
-+  // do the JVMTI work here to avoid disturbing the register state below
-+  // We use c_rarg registers here beacause we want to use the register used in
-+  // the call to the VM
-+  if (JvmtiExport::can_post_field_access()) {
-+    // Check to see if a field access watch has been set before we
-+    // take the time to call into the VM.
-+    Label L1;
-+    assert_different_registers(cache, index, x10);
-+    int32_t offset = 0;
-+    __ la_patchable(t0, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), offset);
-+    __ lwu(x10, Address(t0, offset));
++#ifdef COMPILER2
++void VM_Version::c2_initialize() {
++  if (UseCMoveUnconditionally) {
++    FLAG_SET_DEFAULT(UseCMoveUnconditionally, false);
++  }
 +
-+    __ beqz(x10, L1);
++  if (ConditionalMoveLimit > 0) {
++    FLAG_SET_DEFAULT(ConditionalMoveLimit, 0);
++  }
 +
-+    __ get_cache_and_index_at_bcp(c_rarg2, c_rarg3, 1);
-+    __ la(c_rarg2, Address(c_rarg2, in_bytes(ConstantPoolCache::base_offset())));
++  if (!UseRVV) {
++    FLAG_SET_DEFAULT(SpecialEncodeISOArray, false);
++  }
 +
-+    if (is_static) {
-+      __ mv(c_rarg1, zr); // NULL object reference
++  if (!UseRVV && MaxVectorSize) {
++    FLAG_SET_DEFAULT(MaxVectorSize, 0);
++  }
++
++  if (UseRVV) {
++    if (FLAG_IS_DEFAULT(MaxVectorSize)) {
++      MaxVectorSize = _initial_vector_length;
++    } else if (MaxVectorSize < 16) {
++      warning("RVV does not support vector length less than 16 bytes. Disabling RVV.");
++      UseRVV = false;
++    } else if (is_power_of_2(MaxVectorSize)) {
++      if (MaxVectorSize > _initial_vector_length) {
++        warning("Current system only supports max RVV vector length %d. Set MaxVectorSize to %d",
++                _initial_vector_length, _initial_vector_length);
++      }
++      MaxVectorSize = _initial_vector_length;
 +    } else {
-+      __ ld(c_rarg1, at_tos()); // get object pointer without popping it
-+      __ verify_oop(c_rarg1);
++      vm_exit_during_initialization(err_msg("Unsupported MaxVectorSize: %d", (int)MaxVectorSize));
 +    }
-+    // c_rarg1: object pointer or NULL
-+    // c_rarg2: cache entry pointer
-+    // c_rarg3: jvalue object on the stack
-+    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
-+                                       InterpreterRuntime::post_field_access),
-+                                       c_rarg1, c_rarg2, c_rarg3);
-+    __ get_cache_and_index_at_bcp(cache, index, 1);
-+    __ bind(L1);
 +  }
-+}
 +
-+void TemplateTable::pop_and_check_object(Register r)
-+{
-+  __ pop_ptr(r);
-+  __ null_check(r);  // for field access must check obj.
-+  __ verify_oop(r);
-+}
++  // disable prefetch
++  if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
++  }
 +
-+void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc)
-+{
-+  const Register cache     = x12;
-+  const Register index     = x13;
-+  const Register obj       = x14;
-+  const Register off       = x9;
-+  const Register flags     = x10;
-+  const Register raw_flags = x16;
-+  const Register bc        = x14; // uses same reg as obj, so don't mix them
++  if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
++    FLAG_SET_DEFAULT(UseMulAddIntrinsic, true);
++  }
 +
-+  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
-+  jvmti_post_field_access(cache, index, is_static, false);
-+  load_field_cp_cache_entry(obj, cache, index, off, raw_flags, is_static);
++  if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
++    FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true);
++  }
 +
-+  if (!is_static) {
-+    // obj is on the stack
-+    pop_and_check_object(obj);
++  if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
++    FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, true);
 +  }
 +
-+  __ add(off, obj, off);
-+  const Address field(off);
++  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
++    FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, true);
++  }
 +
-+  Label Done, notByte, notBool, notInt, notShort, notChar,
-+              notLong, notFloat, notObj, notDouble;
++  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
++    FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, true);
++  }
++}
++#endif // COMPILER2
+diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
+new file mode 100644
+index 0000000000..875511f522
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
+@@ -0,0 +1,80 @@
++/*
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  __ slli(flags, raw_flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift +
-+                                    ConstantPoolCacheEntry::tos_state_bits));
-+  __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits);
++#ifndef CPU_RISCV_VM_VERSION_RISCV_HPP
++#define CPU_RISCV_VM_VERSION_RISCV_HPP
 +
-+  assert(btos == 0, "change code, btos != 0");
-+  __ bnez(flags, notByte);
++#include "runtime/abstract_vm_version.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/globals_extension.hpp"
++#include "utilities/sizes.hpp"
 +
-+  // Dont't rewrite getstatic, only getfield
-+  if (is_static) {
-+    rc = may_not_rewrite;
-+  }
++class VM_Version : public Abstract_VM_Version {
++#ifdef COMPILER2
++private:
++  static void c2_initialize();
++#endif // COMPILER2
 +
-+  // btos
-+  __ access_load_at(T_BYTE, IN_HEAP, x10, field, noreg, noreg);
-+  __ push(btos);
-+  // Rewrite bytecode to be faster
-+  if (rc == may_rewrite) {
-+    patch_bytecode(Bytecodes::_fast_bgetfield, bc, x11);
-+  }
-+  __ j(Done);
++// VM modes (satp.mode) privileged ISA 1.10
++enum VM_MODE {
++  VM_MBARE = 0,
++  VM_SV39  = 8,
++  VM_SV48  = 9,
++  VM_SV57  = 10,
++  VM_SV64  = 11
++};
 +
-+  __ bind(notByte);
-+  __ sub(t0, flags, (u1)ztos);
-+  __ bnez(t0, notBool);
++protected:
++  static const char* _uarch;
++  static const char* _vm_mode;
++  static uint32_t _initial_vector_length;
++  static void get_os_cpu_info();
++  static uint32_t get_current_vector_length();
++  static VM_MODE get_satp_mode();
 +
-+  // ztos (same code as btos)
-+  __ access_load_at(T_BOOLEAN, IN_HEAP, x10, field, noreg, noreg);
-+  __ push(ztos);
-+  // Rewirte bytecode to be faster
-+  if (rc == may_rewrite) {
-+    // uses btos rewriting, no truncating to t/f bit is needed for getfield
-+    patch_bytecode(Bytecodes::_fast_bgetfield, bc, x11);
-+  }
-+  __ j(Done);
++public:
++  // Initialization
++  static void initialize();
 +
-+  __ bind(notBool);
-+  __ sub(t0, flags, (u1)atos);
-+  __ bnez(t0, notObj);
-+  // atos
-+  do_oop_load(_masm, field, x10, IN_HEAP);
-+  __ push(atos);
-+  if (rc == may_rewrite) {
-+    patch_bytecode(Bytecodes::_fast_agetfield, bc, x11);
-+  }
-+  __ j(Done);
++  enum Feature_Flag {
++#define CPU_FEATURE_FLAGS(decl)               \
++    decl(I,            "i",            8)     \
++    decl(M,            "m",           12)     \
++    decl(A,            "a",            0)     \
++    decl(F,            "f",            5)     \
++    decl(D,            "d",            3)     \
++    decl(C,            "c",            2)     \
++    decl(V,            "v",           21)
 +
-+  __ bind(notObj);
-+  __ sub(t0, flags, (u1)itos);
-+  __ bnez(t0, notInt);
-+  // itos
-+  __ access_load_at(T_INT, IN_HEAP, x10, field, noreg, noreg);
-+  __ addw(x10, x10, zr); // signed extended
-+  __ push(itos);
-+  // Rewrite bytecode to be faster
-+  if (rc == may_rewrite) {
-+    patch_bytecode(Bytecodes::_fast_igetfield, bc, x11);
-+  }
-+  __ j(Done);
++#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1 << bit),
++    CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)
++#undef DECLARE_CPU_FEATURE_FLAG
++  };
 +
-+  __ bind(notInt);
-+  __ sub(t0, flags, (u1)ctos);
-+  __ bnez(t0, notChar);
-+  // ctos
-+  __ access_load_at(T_CHAR, IN_HEAP, x10, field, noreg, noreg);
-+  __ push(ctos);
-+  // Rewrite bytecode to be faster
-+  if (rc == may_rewrite) {
-+    patch_bytecode(Bytecodes::_fast_cgetfield, bc, x11);
-+  }
-+  __ j(Done);
++  static void initialize_cpu_information(void);
++};
 +
-+  __ bind(notChar);
-+  __ sub(t0, flags, (u1)stos);
-+  __ bnez(t0, notShort);
-+  // stos
-+  __ access_load_at(T_SHORT, IN_HEAP, x10, field, noreg, noreg);
-+  __ push(stos);
-+  // Rewrite bytecode to be faster
-+  if (rc == may_rewrite) {
-+    patch_bytecode(Bytecodes::_fast_sgetfield, bc, x11);
-+  }
-+  __ j(Done);
++#endif // CPU_RISCV_VM_VERSION_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
+new file mode 100644
+index 0000000000..c4338715f9
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  __ bind(notShort);
-+  __ sub(t0, flags, (u1)ltos);
-+  __ bnez(t0, notLong);
-+  // ltos
-+  __ access_load_at(T_LONG, IN_HEAP, x10, field, noreg, noreg);
-+  __ push(ltos);
-+  // Rewrite bytecode to be faster
-+  if (rc == may_rewrite) {
-+    patch_bytecode(Bytecodes::_fast_lgetfield, bc, x11);
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "code/vmreg.hpp"
++
++void VMRegImpl::set_regName() {
++  int i = 0;
++  Register reg = ::as_Register(0);
++  for ( ; i < ConcreteRegisterImpl::max_gpr ; ) {
++    for (int j = 0 ; j < RegisterImpl::max_slots_per_register ; j++) {
++      regName[i++] = reg->name();
++    }
++    reg = reg->successor();
 +  }
-+  __ j(Done);
 +
-+  __ bind(notLong);
-+  __ sub(t0, flags, (u1)ftos);
-+  __ bnez(t0, notFloat);
-+  // ftos
-+  __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
-+  __ push(ftos);
-+  // Rewrite bytecode to be faster
-+  if (rc == may_rewrite) {
-+    patch_bytecode(Bytecodes::_fast_fgetfield, bc, x11);
++  FloatRegister freg = ::as_FloatRegister(0);
++  for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
++    for (int j = 0 ; j < FloatRegisterImpl::max_slots_per_register ; j++) {
++      regName[i++] = freg->name();
++    }
++    freg = freg->successor();
 +  }
-+  __ j(Done);
 +
-+  __ bind(notFloat);
-+#ifdef ASSERT
-+  __ sub(t0, flags, (u1)dtos);
-+  __ bnez(t0, notDouble);
-+#endif
-+  // dtos
-+  __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
-+  __ push(dtos);
-+  // Rewrite bytecode to be faster
-+  if (rc == may_rewrite) {
-+    patch_bytecode(Bytecodes::_fast_dgetfield, bc, x11);
++  for ( ; i < ConcreteRegisterImpl::number_of_registers ; i++) {
++    regName[i] = "NON-GPR-FPR";
 +  }
-+#ifdef ASSERT
-+  __ j(Done);
++}
+diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.hpp
+new file mode 100644
+index 0000000000..6f613a8f11
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/vmreg_riscv.hpp
+@@ -0,0 +1,53 @@
++/*
++ * Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  __ bind(notDouble);
-+  __ stop("Bad state");
-+#endif
++#ifndef CPU_RISCV_VMREG_RISCV_HPP
++#define CPU_RISCV_VMREG_RISCV_HPP
 +
-+  __ bind(Done);
++inline bool is_Register() {
++  return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr;
++}
 +
-+  Label notVolatile;
-+  __ andi(t0, raw_flags, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
-+  __ beqz(t0, notVolatile);
-+  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
-+  __ bind(notVolatile);
++inline bool is_FloatRegister() {
++  return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr;
 +}
 +
-+void TemplateTable::getfield(int byte_no)
-+{
-+  getfield_or_static(byte_no, false);
++inline Register as_Register() {
++  assert(is_Register(), "must be");
++  return ::as_Register(value() / RegisterImpl::max_slots_per_register);
 +}
 +
-+void TemplateTable::nofast_getfield(int byte_no) {
-+  getfield_or_static(byte_no, false, may_not_rewrite);
++inline FloatRegister as_FloatRegister() {
++  assert(is_FloatRegister() && is_even(value()), "must be");
++  return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) /
++                            FloatRegisterImpl::max_slots_per_register);
 +}
 +
-+void TemplateTable::getstatic(int byte_no)
-+{
-+  getfield_or_static(byte_no, true);
++inline bool is_concrete() {
++  assert(is_reg(), "must be");
++  return is_even(value());
 +}
 +
-+// The registers cache and index expected to be set before call.
-+// The function may destroy various registers, just not the cache and index registers.
-+void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
-+  transition(vtos, vtos);
++#endif // CPU_RISCV_VMREG_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp
+new file mode 100644
+index 0000000000..06b70020b4
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp
+@@ -0,0 +1,46 @@
++/*
++ * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
++#ifndef CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP
++#define CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP
 +
-+  if (JvmtiExport::can_post_field_modification()) {
-+    // Check to see if a field modification watch has been set before
-+    // we take the time to call into the VM.
-+    Label L1;
-+    assert_different_registers(cache, index, x10);
-+    int32_t offset = 0;
-+    __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()), offset);
-+    __ lwu(x10, Address(t0, offset));
-+    __ beqz(x10, L1);
-+
-+    __ get_cache_and_index_at_bcp(c_rarg2, t0, 1);
-+
-+    if (is_static) {
-+      // Life is simple. Null out the object pointer.
-+      __ mv(c_rarg1, zr);
-+    } else {
-+      // Life is harder. The stack holds the value on top, followed by
-+      // the object. We don't know the size of the value, though; it
-+      // could be one or two words depending on its type. As a result,
-+      // we must find the type to determine where the object is.
-+      __ lwu(c_rarg3, Address(c_rarg2,
-+                              in_bytes(cp_base_offset +
-+                                       ConstantPoolCacheEntry::flags_offset())));
-+      __ srli(c_rarg3, c_rarg3, ConstantPoolCacheEntry::tos_state_shift);
-+      ConstantPoolCacheEntry::verify_tos_state_shift();
-+      Label nope2, done, ok;
-+      __ ld(c_rarg1, at_tos_p1());   // initially assume a one word jvalue
-+      __ sub(t0, c_rarg3, ltos);
-+      __ beqz(t0, ok);
-+      __ sub(t0, c_rarg3, dtos);
-+      __ bnez(t0, nope2);
-+      __ bind(ok);
-+      __ ld(c_rarg1, at_tos_p2());  // ltos (two word jvalue);
-+      __ bind(nope2);
-+    }
-+    // cache entry pointer
-+    __ add(c_rarg2, c_rarg2, in_bytes(cp_base_offset));
-+    // object (tos)
-+    __ mv(c_rarg3, esp);
-+    // c_rarg1: object pointer set up above (NULL if static)
-+    // c_rarg2: cache entry pointer
-+    // c_rarg3: jvalue object on  the stack
-+    __ call_VM(noreg,
-+               CAST_FROM_FN_PTR(address,
-+                                InterpreterRuntime::post_field_modification),
-+                                c_rarg1, c_rarg2, c_rarg3);
-+    __ get_cache_and_index_at_bcp(cache, index, 1);
-+    __ bind(L1);
++inline VMReg RegisterImpl::as_VMReg() const {
++  if (this == noreg) {
++    return VMRegImpl::Bad();
 +  }
++  return VMRegImpl::as_VMReg(encoding() * RegisterImpl::max_slots_per_register);
 +}
 +
-+void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
-+  transition(vtos, vtos);
-+
-+  const Register cache = x12;
-+  const Register index = x13;
-+  const Register obj   = x12;
-+  const Register off   = x9;
-+  const Register flags = x10;
-+  const Register bc    = x14;
++inline VMReg FloatRegisterImpl::as_VMReg() const {
++  return VMRegImpl::as_VMReg((encoding() * FloatRegisterImpl::max_slots_per_register) +
++                             ConcreteRegisterImpl::max_gpr);
++}
 +
-+  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
-+  jvmti_post_field_mod(cache, index, is_static);
-+  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
++inline VMReg VectorRegisterImpl::as_VMReg() const {
++  return VMRegImpl::as_VMReg((encoding() * VectorRegisterImpl::max_slots_per_register) +
++                             ConcreteRegisterImpl::max_fpr);
++}
 +
-+  Label Done;
-+  __ mv(x15, flags);
++#endif // CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP
+diff --git a/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp
+new file mode 100644
+index 0000000000..448bb09ba7
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp
+@@ -0,0 +1,260 @@
++/*
++ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  {
-+    Label notVolatile;
-+    __ andi(t0, x15, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
-+    __ beqz(t0, notVolatile);
-+    __ membar(MacroAssembler::StoreStore | MacroAssembler::LoadStore);
-+    __ bind(notVolatile);
-+  }
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "assembler_riscv.inline.hpp"
++#include "code/vtableStubs.hpp"
++#include "interp_masm_riscv.hpp"
++#include "memory/resourceArea.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "oops/instanceKlass.hpp"
++#include "oops/klassVtable.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "vmreg_riscv.inline.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
 +
-+  Label notByte, notBool, notInt, notShort, notChar,
-+        notLong, notFloat, notObj, notDouble;
++// machine-dependent part of VtableStubs: create VtableStub of correct size and
++// initialize its code
 +
-+  __ slli(flags, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift +
-+                                ConstantPoolCacheEntry::tos_state_bits));
-+  __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits);
++#define __ masm->
 +
-+  assert(btos == 0, "change code, btos != 0");
-+  __ bnez(flags, notByte);
++#ifndef PRODUCT
++extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index);
++#endif
 +
-+  // Don't rewrite putstatic, only putfield
-+  if (is_static) {
-+    rc = may_not_rewrite;
++VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
++  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
++  const int stub_code_length = code_size_limit(true);
++  VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
++  // Can be NULL if there is no free space in the code cache.
++  if (s == NULL) {
++    return NULL;
 +  }
 +
-+  // btos
-+  {
-+    __ pop(btos);
-+    // field address
-+    if (!is_static) {
-+      pop_and_check_object(obj);
-+    }
-+    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
-+    const Address field(off, 0); // off register as temparator register.
-+    __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg);
-+    if (rc == may_rewrite) {
-+      patch_bytecode(Bytecodes::_fast_bputfield, bc, x11, true, byte_no);
-+    }
-+    __ j(Done);
-+  }
++  // Count unused bytes in instruction sequences of variable size.
++  // We add them to the computed buffer size in order to avoid
++  // overflow in subsequently generated stubs.
++  address   start_pc = NULL;
++  int       slop_bytes = 0;
++  int       slop_delta = 0;
 +
-+  __ bind(notByte);
-+  __ sub(t0, flags, (u1)ztos);
-+  __ bnez(t0, notBool);
++  ResourceMark    rm;
++  CodeBuffer      cb(s->entry_point(), stub_code_length);
++  MacroAssembler* masm = new MacroAssembler(&cb);
++  assert_cond(masm != NULL);
 +
-+  // ztos
-+  {
-+    __ pop(ztos);
-+    // field address
-+    if (!is_static) {
-+      pop_and_check_object(obj);
-+    }
-+    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
-+    const Address field(off, 0);
-+    __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg);
-+    if (rc == may_rewrite) {
-+      patch_bytecode(Bytecodes::_fast_zputfield, bc, x11, true, byte_no);
-+    }
-+    __ j(Done);
++#if (!defined(PRODUCT) && defined(COMPILER2))
++  if (CountCompiledCalls) {
++    __ la(t2, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
++    __ increment(Address(t2));
 +  }
++#endif
 +
-+  __ bind(notBool);
-+  __ sub(t0, flags, (u1)atos);
-+  __ bnez(t0, notObj);
++  // get receiver (need to skip return address on top of stack)
++  assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
 +
-+  // atos
-+  {
-+    __ pop(atos);
-+    // field address
-+    if (!is_static) {
-+      pop_and_check_object(obj);
-+    }
-+    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
-+    const Address field(off, 0);
-+    // Store into the field
-+    do_oop_store(_masm, field, x10, IN_HEAP);
-+    if (rc == may_rewrite) {
-+      patch_bytecode(Bytecodes::_fast_aputfield, bc, x11, true, byte_no);
-+    }
-+    __ j(Done);
-+  }
++  // get receiver klass
++  address npe_addr = __ pc();
++  __ load_klass(t2, j_rarg0);
 +
-+  __ bind(notObj);
-+  __ sub(t0, flags, (u1)itos);
-+  __ bnez(t0, notInt);
++#ifndef PRODUCT
++  if (DebugVtables) {
++    Label L;
++    start_pc = __ pc();
 +
-+  // itos
-+  {
-+    __ pop(itos);
-+    // field address
-+    if (!is_static) {
-+      pop_and_check_object(obj);
-+    }
-+    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
-+    const Address field(off, 0);
-+    __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg);
-+    if (rc == may_rewrite) {
-+      patch_bytecode(Bytecodes::_fast_iputfield, bc, x11, true, byte_no);
-+    }
-+    __ j(Done);
-+  }
++    // check offset vs vtable length
++    __ lwu(t0, Address(t2, Klass::vtable_length_offset()));
++    __ mvw(t1, vtable_index * vtableEntry::size());
++    __ bgt(t0, t1, L);
++    __ enter();
++    __ mv(x12, vtable_index);
 +
-+  __ bind(notInt);
-+  __ sub(t0, flags, (u1)ctos);
-+  __ bnez(t0, notChar);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, x12);
++    const ptrdiff_t estimate = 256;
++    const ptrdiff_t codesize = __ pc() - start_pc;
++    slop_delta = estimate - codesize;  // call_VM varies in length, depending on data
++    slop_bytes += slop_delta;
++    assert(slop_delta >= 0, "vtable #%d: Code size estimate (%d) for DebugVtables too small, required: %d", vtable_index, (int)estimate, (int)codesize);
 +
-+  // ctos
-+  {
-+    __ pop(ctos);
-+    // field address
-+    if (!is_static) {
-+      pop_and_check_object(obj);
-+    }
-+    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
-+    const Address field(off, 0);
-+    __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg);
-+    if (rc == may_rewrite) {
-+      patch_bytecode(Bytecodes::_fast_cputfield, bc, x11, true, byte_no);
-+    }
-+    __ j(Done);
++    __ leave();
++    __ bind(L);
 +  }
++#endif // PRODUCT
 +
-+  __ bind(notChar);
-+  __ sub(t0, flags, (u1)stos);
-+  __ bnez(t0, notShort);
++  start_pc = __ pc();
++  __ lookup_virtual_method(t2, vtable_index, xmethod);
++  // lookup_virtual_method generates
++  // 4 instructions (maximum value encountered in normal case):li(lui + addiw) + add + ld
++  // 1 instruction (best case):ld * 1
++  slop_delta = 16 - (int)(__ pc() - start_pc);
++  slop_bytes += slop_delta;
++  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
 +
-+  // stos
-+  {
-+    __ pop(stos);
-+    // field address
-+    if (!is_static) {
-+      pop_and_check_object(obj);
-+    }
-+    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
-+    const Address field(off, 0);
-+    __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg);
-+    if (rc == may_rewrite) {
-+      patch_bytecode(Bytecodes::_fast_sputfield, bc, x11, true, byte_no);
-+    }
-+    __ j(Done);
++#ifndef PRODUCT
++  if (DebugVtables) {
++    Label L;
++    __ beqz(xmethod, L);
++    __ ld(t0, Address(xmethod, Method::from_compiled_offset()));
++    __ bnez(t0, L);
++    __ stop("Vtable entry is NULL");
++    __ bind(L);
 +  }
++#endif // PRODUCT
 +
-+  __ bind(notShort);
-+  __ sub(t0, flags, (u1)ltos);
-+  __ bnez(t0, notLong);
++  // x10: receiver klass
++  // xmethod: Method*
++  // x12: receiver
++  address ame_addr = __ pc();
++  __ ld(t0, Address(xmethod, Method::from_compiled_offset()));
++  __ jr(t0);
 +
-+  // ltos
-+  {
-+    __ pop(ltos);
-+    // field address
-+    if (!is_static) {
-+      pop_and_check_object(obj);
-+    }
-+    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
-+    const Address field(off, 0);
-+    __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg);
-+    if (rc == may_rewrite) {
-+      patch_bytecode(Bytecodes::_fast_lputfield, bc, x11, true, byte_no);
-+    }
-+    __ j(Done);
-+  }
++  masm->flush();
++  bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0);
 +
-+  __ bind(notLong);
-+  __ sub(t0, flags, (u1)ftos);
-+  __ bnez(t0, notFloat);
++  return s;
++}
 +
-+  // ftos
-+  {
-+    __ pop(ftos);
-+    // field address
-+    if (!is_static) {
-+      pop_and_check_object(obj);
-+    }
-+    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
-+    const Address field(off, 0);
-+    __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg);
-+    if (rc == may_rewrite) {
-+      patch_bytecode(Bytecodes::_fast_fputfield, bc, x11, true, byte_no);
-+    }
-+    __ j(Done);
++VtableStub* VtableStubs::create_itable_stub(int itable_index) {
++  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
++  const int stub_code_length = code_size_limit(false);
++  VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
++  // Can be NULL if there is no free space in the code cache.
++  if (s == NULL) {
++    return NULL;
 +  }
++  // Count unused bytes in instruction sequences of variable size.
++  // We add them to the computed buffer size in order to avoid
++  // overflow in subsequently generated stubs.
++  address   start_pc = NULL;
++  int       slop_bytes = 0;
++  int       slop_delta = 0;
 +
-+  __ bind(notFloat);
-+#ifdef ASSERT
-+  __ sub(t0, flags, (u1)dtos);
-+  __ bnez(t0, notDouble);
-+#endif
++  ResourceMark    rm;
++  CodeBuffer      cb(s->entry_point(), stub_code_length);
++  MacroAssembler* masm = new MacroAssembler(&cb);
++  assert_cond(masm != NULL);
 +
-+  // dtos
-+  {
-+    __ pop(dtos);
-+    // field address
-+    if (!is_static) {
-+      pop_and_check_object(obj);
-+    }
-+    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
-+    const Address field(off, 0);
-+    __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg);
-+    if (rc == may_rewrite) {
-+      patch_bytecode(Bytecodes::_fast_dputfield, bc, x11, true, byte_no);
-+    }
++#if (!defined(PRODUCT) && defined(COMPILER2))
++  if (CountCompiledCalls) {
++    __ la(x18, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
++    __ increment(Address(x18));
 +  }
-+
-+#ifdef ASSERT
-+  __ j(Done);
-+
-+  __ bind(notDouble);
-+  __ stop("Bad state");
 +#endif
 +
-+  __ bind(Done);
++  // get receiver (need to skip return address on top of stack)
++  assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
 +
-+  {
-+    Label notVolatile;
-+    __ andi(t0, x15, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
-+    __ beqz(t0, notVolatile);
-+    __ membar(MacroAssembler::StoreLoad | MacroAssembler::StoreStore);
-+    __ bind(notVolatile);
-+  }
-+}
++  // Entry arguments:
++  //  t1: CompiledICHolder
++  //  j_rarg0: Receiver
 +
-+void TemplateTable::putfield(int byte_no)
-+{
-+  putfield_or_static(byte_no, false);
-+}
++  // This stub is called from compiled code which has no callee-saved registers,
++  // so all registers except arguments are free at this point.
++  const Register recv_klass_reg     = x18;
++  const Register holder_klass_reg   = x19; // declaring interface klass (DECC)
++  const Register resolved_klass_reg = xmethod; // resolved interface klass (REFC)
++  const Register temp_reg           = x28;
++  const Register temp_reg2          = x29;
++  const Register icholder_reg       = t1;
 +
-+void TemplateTable::nofast_putfield(int byte_no) {
-+  putfield_or_static(byte_no, false, may_not_rewrite);
-+}
++  Label L_no_such_interface;
 +
-+void TemplateTable::putstatic(int byte_no) {
-+  putfield_or_static(byte_no, true);
-+}
++  __ ld(resolved_klass_reg, Address(icholder_reg, CompiledICHolder::holder_klass_offset()));
++  __ ld(holder_klass_reg,   Address(icholder_reg, CompiledICHolder::holder_metadata_offset()));
 +
-+void TemplateTable::jvmti_post_fast_field_mod()
-+{
-+  if (JvmtiExport::can_post_field_modification()) {
-+    // Check to see if a field modification watch has been set before
-+    // we take the time to call into the VM.
-+    Label L2;
-+    int32_t offset = 0;
-+    __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()), offset);
-+    __ lwu(c_rarg3, Address(t0, offset));
-+    __ beqz(c_rarg3, L2);
-+    __ pop_ptr(x9);                  // copy the object pointer from tos
-+    __ verify_oop(x9);
-+    __ push_ptr(x9);                 // put the object pointer back on tos
-+    // Save tos values before call_VM() clobbers them. Since we have
-+    // to do it for every data type, we use the saved values as the
-+    // jvalue object.
-+    switch (bytecode()) {          // load values into the jvalue object
-+      case Bytecodes::_fast_aputfield: __ push_ptr(x10); break;
-+      case Bytecodes::_fast_bputfield: // fall through
-+      case Bytecodes::_fast_zputfield: // fall through
-+      case Bytecodes::_fast_sputfield: // fall through
-+      case Bytecodes::_fast_cputfield: // fall through
-+      case Bytecodes::_fast_iputfield: __ push_i(x10); break;
-+      case Bytecodes::_fast_dputfield: __ push_d(); break;
-+      case Bytecodes::_fast_fputfield: __ push_f(); break;
-+      case Bytecodes::_fast_lputfield: __ push_l(x10); break;
++  start_pc = __ pc();
 +
-+      default:
-+        ShouldNotReachHere();
-+    }
-+    __ mv(c_rarg3, esp);             // points to jvalue on the stack
-+    // access constant pool cache entry
-+    __ get_cache_entry_pointer_at_bcp(c_rarg2, x10, 1);
-+    __ verify_oop(x9);
-+    // x9: object pointer copied above
-+    // c_rarg2: cache entry pointer
-+    // c_rarg3: jvalue object on the stack
-+    __ call_VM(noreg,
-+               CAST_FROM_FN_PTR(address,
-+                                InterpreterRuntime::post_field_modification),
-+               x9, c_rarg2, c_rarg3);
++  // get receiver klass (also an implicit null-check)
++  address npe_addr = __ pc();
++  __ load_klass(recv_klass_reg, j_rarg0);
 +
-+    switch (bytecode()) {             // restore tos values
-+      case Bytecodes::_fast_aputfield: __ pop_ptr(x10); break;
-+      case Bytecodes::_fast_bputfield: // fall through
-+      case Bytecodes::_fast_zputfield: // fall through
-+      case Bytecodes::_fast_sputfield: // fall through
-+      case Bytecodes::_fast_cputfield: // fall through
-+      case Bytecodes::_fast_iputfield: __ pop_i(x10); break;
-+      case Bytecodes::_fast_dputfield: __ pop_d(); break;
-+      case Bytecodes::_fast_fputfield: __ pop_f(); break;
-+      case Bytecodes::_fast_lputfield: __ pop_l(x10); break;
-+      default: break;
-+    }
-+    __ bind(L2);
-+  }
-+}
++  // Receiver subtype check against REFC.
++  __ lookup_interface_method(// inputs: rec. class, interface
++                             recv_klass_reg, resolved_klass_reg, noreg,
++                             // outputs:  scan temp. reg1, scan temp. reg2
++                             temp_reg2, temp_reg,
++                             L_no_such_interface,
++                             /*return_method=*/false);
 +
-+void TemplateTable::fast_storefield(TosState state)
-+{
-+  transition(state, vtos);
++  const ptrdiff_t typecheckSize = __ pc() - start_pc;
++  start_pc = __ pc();
 +
-+  ByteSize base = ConstantPoolCache::base_offset();
++  // Get selected method from declaring class and itable index
++  __ lookup_interface_method(// inputs: rec. class, interface, itable index
++                             recv_klass_reg, holder_klass_reg, itable_index,
++                             // outputs: method, scan temp. reg
++                             xmethod, temp_reg,
++                             L_no_such_interface);
 +
-+  jvmti_post_fast_field_mod();
++  const ptrdiff_t lookupSize = __ pc() - start_pc;
 +
-+  // access constant pool cache
-+  __ get_cache_and_index_at_bcp(x12, x11, 1);
++  // Reduce "estimate" such that "padding" does not drop below 8.
++  const ptrdiff_t estimate = 256;
++  const ptrdiff_t codesize = typecheckSize + lookupSize;
++  slop_delta = (int)(estimate - codesize);
++  slop_bytes += slop_delta;
++  assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize);
 +
-+  // Must prevent reordering of the following cp cache loads with bytecode load
-+  __ membar(MacroAssembler::LoadLoad);
-+
-+  // test for volatile with x13
-+  __ lwu(x13, Address(x12, in_bytes(base +
-+                                    ConstantPoolCacheEntry::flags_offset())));
-+
-+  // replace index with field offset from cache entry
-+  __ ld(x11, Address(x12, in_bytes(base + ConstantPoolCacheEntry::f2_offset())));
-+
-+  {
-+    Label notVolatile;
-+    __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
-+    __ beqz(t0, notVolatile);
-+    __ membar(MacroAssembler::StoreStore | MacroAssembler::LoadStore);
-+    __ bind(notVolatile);
++#ifdef ASSERT
++  if (DebugVtables) {
++    Label L2;
++    __ beqz(xmethod, L2);
++    __ ld(t0, Address(xmethod, Method::from_compiled_offset()));
++    __ bnez(t0, L2);
++    __ stop("compiler entrypoint is null");
++    __ bind(L2);
 +  }
++#endif // ASSERT
 +
-+  // Get object from stack
-+  pop_and_check_object(x12);
++  // xmethod: Method*
++  // j_rarg0: receiver
++  address ame_addr = __ pc();
++  __ ld(t0, Address(xmethod, Method::from_compiled_offset()));
++  __ jr(t0);
 +
-+  // field address
-+  __ add(x11, x12, x11);
-+  const Address field(x11, 0);
++  __ bind(L_no_such_interface);
++  // Handle IncompatibleClassChangeError in itable stubs.
++  // More detailed error message.
++  // We force resolving of the call site by jumping to the "handle
++  // wrong method" stub, and so let the interpreter runtime do all the
++  // dirty work.
++  assert(SharedRuntime::get_handle_wrong_method_stub() != NULL, "check initialization order");
++  __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
 +
-+  // access field
-+  switch (bytecode()) {
-+    case Bytecodes::_fast_aputfield:
-+      do_oop_store(_masm, field, x10, IN_HEAP);
-+      break;
-+    case Bytecodes::_fast_lputfield:
-+      __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg);
-+      break;
-+    case Bytecodes::_fast_iputfield:
-+      __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg);
-+      break;
-+    case Bytecodes::_fast_zputfield:
-+      __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg);
-+      break;
-+    case Bytecodes::_fast_bputfield:
-+      __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg);
-+      break;
-+    case Bytecodes::_fast_sputfield:
-+      __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg);
-+      break;
-+    case Bytecodes::_fast_cputfield:
-+      __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg);
-+      break;
-+    case Bytecodes::_fast_fputfield:
-+      __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg);
-+      break;
-+    case Bytecodes::_fast_dputfield:
-+      __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg);
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
++  masm->flush();
++  bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0);
 +
-+  {
-+    Label notVolatile;
-+    __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
-+    __ beqz(t0, notVolatile);
-+    __ membar(MacroAssembler::StoreLoad | MacroAssembler::StoreStore);
-+    __ bind(notVolatile);
-+  }
++  return s;
 +}
 +
-+void TemplateTable::fast_accessfield(TosState state)
-+{
-+  transition(atos, state);
-+  // Do the JVMTI work here to avoid disturbing the register state below
-+  if (JvmtiExport::can_post_field_access()) {
-+    // Check to see if a field access watch has been set before we
-+    // take the time to call into the VM.
-+    Label L1;
-+    int32_t offset = 0;
-+    __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_access_count_addr()), offset);
-+    __ lwu(x12, Address(t0, offset));
-+    __ beqz(x12, L1);
-+    // access constant pool cache entry
-+    __ get_cache_entry_pointer_at_bcp(c_rarg2, t1, 1);
-+    __ verify_oop(x10);
-+    __ push_ptr(x10);  // save object pointer before call_VM() clobbers it
-+    __ mv(c_rarg1, x10);
-+    // c_rarg1: object pointer copied above
-+    // c_rarg2: cache entry pointer
-+    __ call_VM(noreg,
-+               CAST_FROM_FN_PTR(address,
-+                                InterpreterRuntime::post_field_access),
-+               c_rarg1, c_rarg2);
-+    __ pop_ptr(x10); // restore object pointer
-+    __ bind(L1);
-+  }
-+
-+  // access constant pool cache
-+  __ get_cache_and_index_at_bcp(x12, x11, 1);
-+
-+  // Must prevent reordering of the following cp cache loads with bytecode load
-+  __ membar(MacroAssembler::LoadLoad);
-+
-+  __ ld(x11, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
-+                                   ConstantPoolCacheEntry::f2_offset())));
-+  __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
-+                                    ConstantPoolCacheEntry::flags_offset())));
-+
-+  // x10: object
-+  __ verify_oop(x10);
-+  __ null_check(x10);
-+  __ add(x11, x10, x11);
-+  const Address field(x11, 0);
-+
-+  // access field
-+  switch (bytecode()) {
-+    case Bytecodes::_fast_agetfield:
-+      do_oop_load(_masm, field, x10, IN_HEAP);
-+      __ verify_oop(x10);
-+      break;
-+    case Bytecodes::_fast_lgetfield:
-+      __ access_load_at(T_LONG, IN_HEAP, x10, field, noreg, noreg);
-+      break;
-+    case Bytecodes::_fast_igetfield:
-+      __ access_load_at(T_INT, IN_HEAP, x10, field, noreg, noreg);
-+      __ addw(x10, x10, zr); // signed extended
-+      break;
-+    case Bytecodes::_fast_bgetfield:
-+      __ access_load_at(T_BYTE, IN_HEAP, x10, field, noreg, noreg);
-+      break;
-+    case Bytecodes::_fast_sgetfield:
-+      __ access_load_at(T_SHORT, IN_HEAP, x10, field, noreg, noreg);
-+      break;
-+    case Bytecodes::_fast_cgetfield:
-+      __ access_load_at(T_CHAR, IN_HEAP, x10, field, noreg, noreg);
-+      break;
-+    case Bytecodes::_fast_fgetfield:
-+      __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
-+      break;
-+    case Bytecodes::_fast_dgetfield:
-+      __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg);
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
-+  {
-+    Label notVolatile;
-+    __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
-+    __ beqz(t0, notVolatile);
-+    __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
-+    __ bind(notVolatile);
-+  }
++int VtableStub::pd_code_alignment() {
++  // RISCV cache line size is not an architected constant. We just align on word size.
++  const unsigned int icache_line_size = wordSize;
++  return icache_line_size;
 +}
+diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
+index 2842a11f92..208a374eea 100644
+--- a/src/hotspot/os/linux/os_linux.cpp
++++ b/src/hotspot/os/linux/os_linux.cpp
+@@ -2829,6 +2829,8 @@ void os::get_summary_cpu_info(char* cpuinfo, size_t length) {
+   strncpy(cpuinfo, "IA64", length);
+ #elif defined(PPC)
+   strncpy(cpuinfo, "PPC64", length);
++#elif defined(RISCV)
++  strncpy(cpuinfo, "RISCV64", length);
+ #elif defined(S390)
+   strncpy(cpuinfo, "S390", length);
+ #elif defined(SPARC)
+@@ -4060,7 +4062,8 @@ size_t os::Linux::find_large_page_size() {
+     IA64_ONLY(256 * M)
+     PPC_ONLY(4 * M)
+     S390_ONLY(1 * M)
+-    SPARC_ONLY(4 * M);
++    SPARC_ONLY(4 * M)
++    RISCV64_ONLY(2 * M);
+ #endif // ZERO
+ 
+   FILE *fp = fopen("/proc/meminfo", "r");
+diff --git a/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp
+new file mode 100644
+index 0000000000..f2610af6cd
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp
+@@ -0,0 +1,26 @@
++/*
++ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+void TemplateTable::fast_xaccess(TosState state)
-+{
-+  transition(vtos, state);
-+
-+  // get receiver
-+  __ ld(x10, aaddress(0));
-+  // access constant pool cache
-+  __ get_cache_and_index_at_bcp(x12, x13, 2);
-+  __ ld(x11, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
-+                                   ConstantPoolCacheEntry::f2_offset())));
-+
-+  // make sure exception is reported in correct bcp range (getfield is
-+  // next instruction)
-+  __ addi(xbcp, xbcp, 1);
-+  __ null_check(x10);
-+  switch (state) {
-+    case itos:
-+      __ add(x10, x10, x11);
-+      __ access_load_at(T_INT, IN_HEAP, x10, Address(x10, 0), noreg, noreg);
-+      __ addw(x10, x10, zr); // signed extended
-+      break;
-+    case atos:
-+      __ add(x10, x10, x11);
-+      do_oop_load(_masm, Address(x10, 0), x10, IN_HEAP);
-+      __ verify_oop(x10);
-+      break;
-+    case ftos:
-+      __ add(x10, x10, x11);
-+      __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, Address(x10), noreg, noreg);
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
++// nothing required here
+diff --git a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
+new file mode 100644
+index 0000000000..4a1ebee8b0
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
+@@ -0,0 +1,189 @@
++/*
++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  {
-+    Label notVolatile;
-+    __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
-+                                      ConstantPoolCacheEntry::flags_offset())));
-+    __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
-+    __ beqz(t0, notVolatile);
-+    __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
-+    __ bind(notVolatile);
-+  }
++#ifndef OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
++#define OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
 +
-+  __ sub(xbcp, xbcp, 1);
-+}
++#include "runtime/vm_version.hpp"
 +
-+//-----------------------------------------------------------------------------
-+// Calls
++// Implementation of class atomic
 +
-+void TemplateTable::prepare_invoke(int byte_no,
-+                                   Register method, // linked method (or i-klass)
-+                                   Register index,  // itable index, MethodType, etc.
-+                                   Register recv,   // if caller wants to see it
-+                                   Register flags   // if caller wants to test it
-+                                   ) {
-+  // determine flags
-+  const Bytecodes::Code code = bytecode();
-+  const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
-+  const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
-+  const bool is_invokehandle     = code == Bytecodes::_invokehandle;
-+  const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
-+  const bool is_invokespecial    = code == Bytecodes::_invokespecial;
-+  const bool load_receiver       = (recv  != noreg);
-+  const bool save_flags          = (flags != noreg);
-+  assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
-+  assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
-+  assert(flags == noreg || flags == x13, "");
-+  assert(recv  == noreg || recv  == x12, "");
++// Note that memory_order_conservative requires a full barrier after atomic stores.
++// See https://patchwork.kernel.org/patch/3575821/
 +
-+  // setup registers & access constant pool cache
-+  if (recv == noreg) {
-+    recv = x12;
-+  }
-+  if (flags == noreg) {
-+    flags = x13;
-+  }
-+  assert_different_registers(method, index, recv, flags);
++#if defined(__clang_major__)
++#define FULL_COMPILER_ATOMIC_SUPPORT
++#elif (__GNUC__ > 13) || ((__GNUC__ == 13) && (__GNUC_MINOR__ >= 2))
++#define FULL_COMPILER_ATOMIC_SUPPORT
++#endif
 +
-+  // save 'interpreter return address'
-+  __ save_bcp();
++#define FULL_MEM_BARRIER  __sync_synchronize()
++#define READ_MEM_BARRIER  __atomic_thread_fence(__ATOMIC_ACQUIRE);
++#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE);
 +
-+  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
++template<size_t byte_size>
++struct Atomic::PlatformAdd
++  : Atomic::FetchAndAdd<Atomic::PlatformAdd<byte_size> >
++{
++  template<typename I, typename D>
++  D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const {
++#ifndef FULL_COMPILER_ATOMIC_SUPPORT
++    // If we add add and fetch for sub word and are using older compiler
++    // it must be added here due to not using lib atomic.
++    STATIC_ASSERT(byte_size >= 4);
++#endif
 +
-+  // maybe push appendix to arguments (just before return address)
-+  if (is_invokedynamic || is_invokehandle) {
-+    Label L_no_push;
-+    __ andi(t0, flags, 1UL << ConstantPoolCacheEntry::has_appendix_shift);
-+    __ beqz(t0, L_no_push);
-+    // Push the appendix as a trailing parameter.
-+    // This must be done before we get the receiver,
-+    // since the parameter_size includes it.
-+    __ push_reg(x9);
-+    __ mv(x9, index);
-+    __ load_resolved_reference_at_index(index, x9);
-+    __ pop_reg(x9);
-+    __ push_reg(index);  // push appendix (MethodType, CallSite, etc.)
-+    __ bind(L_no_push);
++    D res = __atomic_add_fetch(dest, add_value, __ATOMIC_RELEASE);
++    FULL_MEM_BARRIER;
++    return res;
 +  }
 +
-+  // load receiver if needed (note: no return address pushed yet)
-+  if (load_receiver) {
-+    __ andi(recv, flags, ConstantPoolCacheEntry::parameter_size_mask); // parameter_size_mask = 1 << 8
-+    __ shadd(t0, recv, esp, t0, 3);
-+    __ ld(recv, Address(t0, -Interpreter::expr_offset_in_bytes(1)));
-+    __ verify_oop(recv);
++  template<typename I, typename D>
++  D fetch_and_add(I add_value, D volatile* dest, atomic_memory_order order) const {
++    return add_and_fetch(add_value, dest, order) - add_value;
 +  }
++};
 +
-+  // compute return type
-+  __ slli(t1, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + ConstantPoolCacheEntry::tos_state_bits));
-+  __ srli(t1, t1, XLEN - ConstantPoolCacheEntry::tos_state_bits); // (1 << 5) - 4 --> 28~31==> t1:0~3
++#ifndef FULL_COMPILER_ATOMIC_SUPPORT
++template<>
++template<typename T>
++inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
++                                                T volatile* dest __attribute__((unused)),
++                                                T compare_value,
++                                                atomic_memory_order order) const {
++  STATIC_ASSERT(1 == sizeof(T));
 +
-+  // load return address
-+  {
-+    const address table_addr = (address) Interpreter::invoke_return_entry_table_for(code);
-+    __ mv(t0, table_addr);
-+    __ shadd(t0, t1, t0, t1, 3);
-+    __ ld(ra, Address(t0, 0));
++  if (order != memory_order_relaxed) {
++    FULL_MEM_BARRIER;
 +  }
-+}
 +
-+void TemplateTable::invokevirtual_helper(Register index,
-+                                         Register recv,
-+                                         Register flags)
-+{
-+  // Uses temporary registers x10, x13
-+  assert_different_registers(index, recv, x10, x13);
-+  // Test for an invoke of a final method
-+  Label notFinal;
-+  __ andi(t0, flags, 1UL << ConstantPoolCacheEntry::is_vfinal_shift);
-+  __ beqz(t0, notFinal);
-+
-+  const Register method = index;  // method must be xmethod
-+  assert(method == xmethod, "Method must be xmethod for interpreter calling convention");
++  uint32_t volatile* aligned_dst = (uint32_t volatile*)(((uintptr_t)dest) & (~((uintptr_t)0x3)));
++  int shift = 8 * (((uintptr_t)dest) - ((uintptr_t)aligned_dst)); // 0, 8, 16, 24
 +
-+  // do the call - the index is actually the method to call
-+  // that is, f2 is a vtable index if !is_vfinal, else f2 is a Method*
++  uint64_t mask = 0xfful << shift; // 0x00000000..FF..
++  uint64_t remask = ~mask;         // 0xFFFFFFFF..00..
 +
-+  // It's final, need a null check here!
-+  __ null_check(recv);
++  uint64_t w_cv = ((uint64_t)(unsigned char)compare_value) << shift;  // widen to 64-bit 0x00000000..CC..
++  uint64_t w_ev = ((uint64_t)(unsigned char)exchange_value) << shift; // widen to 64-bit 0x00000000..EE..
 +
-+  // profile this call
-+  __ profile_final_call(x10);
-+  __ profile_arguments_type(x10, method, x14, true);
++  uint64_t old_value;
++  uint64_t rc_temp;
 +
-+  __ jump_from_interpreted(method);
++  __asm__ __volatile__ (
++    "1:  lr.w      %0, %2      \n\t"
++    "    and       %1, %0, %5  \n\t" // ignore unrelated bytes and widen to 64-bit 0x00000000..XX..
++    "    bne       %1, %3, 2f  \n\t" // compare 64-bit w_cv
++    "    and       %1, %0, %6  \n\t" // remove old byte
++    "    or        %1, %1, %4  \n\t" // add new byte
++    "    sc.w      %1, %1, %2  \n\t" // store new word
++    "    bnez      %1, 1b      \n\t"
++    "2:                        \n\t"
++    : /*%0*/"=&r" (old_value), /*%1*/"=&r" (rc_temp), /*%2*/"+A" (*aligned_dst)
++    : /*%3*/"r" (w_cv), /*%4*/"r" (w_ev), /*%5*/"r" (mask), /*%6*/"r" (remask)
++    : "memory" );
 +
-+  __ bind(notFinal);
++  if (order != memory_order_relaxed) {
++    FULL_MEM_BARRIER;
++  }
 +
-+  // get receiver klass
-+  __ null_check(recv, oopDesc::klass_offset_in_bytes());
-+  __ load_klass(x10, recv);
++  return (T)((old_value & mask) >> shift);
++}
++#endif
 +
-+  // profile this call
-+  __ profile_virtual_call(x10, xlocals, x13);
++template<size_t byte_size>
++template<typename T>
++inline T Atomic::PlatformXchg<byte_size>::operator()(T exchange_value,
++                                                     T volatile* dest,
++                                                     atomic_memory_order order) const {
++#ifndef FULL_COMPILER_ATOMIC_SUPPORT
++  // If we add xchg for sub word and are using older compiler
++  // it must be added here due to not using lib atomic.
++  STATIC_ASSERT(byte_size >= 4);
++#endif
 +
-+  // get target Method & entry point
-+  __ lookup_virtual_method(x10, index, method);
-+  __ profile_arguments_type(x13, method, x14, true);
-+  __ jump_from_interpreted(method);
++  STATIC_ASSERT(byte_size == sizeof(T));
++  T res = __atomic_exchange_n(dest, exchange_value, __ATOMIC_RELEASE);
++  FULL_MEM_BARRIER;
++  return res;
 +}
 +
-+void TemplateTable::invokevirtual(int byte_no)
-+{
-+  transition(vtos, vtos);
-+  assert(byte_no == f2_byte, "use this argument");
-+
-+  prepare_invoke(byte_no, xmethod, noreg, x12, x13);
++// __attribute__((unused)) on dest is to get rid of spurious GCC warnings.
++template<size_t byte_size>
++template<typename T>
++inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T exchange_value,
++                                                        T volatile* dest __attribute__((unused)),
++                                                        T compare_value,
++                                                        atomic_memory_order order) const {
 +
-+  // xmethod: index (actually a Method*)
-+  // x12: receiver
-+  // x13: flags
++#ifndef FULL_COMPILER_ATOMIC_SUPPORT
++  STATIC_ASSERT(byte_size >= 4);
++#endif
 +
-+  invokevirtual_helper(xmethod, x12, x13);
-+}
++  STATIC_ASSERT(byte_size == sizeof(T));
++  T value = compare_value;
++  if (order != memory_order_relaxed) {
++    FULL_MEM_BARRIER;
++  }
 +
-+void TemplateTable::invokespecial(int byte_no)
-+{
-+  transition(vtos, vtos);
-+  assert(byte_no == f1_byte, "use this argument");
++  __atomic_compare_exchange(dest, &value, &exchange_value, /* weak */ false,
++                            __ATOMIC_RELAXED, __ATOMIC_RELAXED);
 +
-+  prepare_invoke(byte_no, xmethod, noreg,  // get f1 Method*
-+                 x12);  // get receiver also for null check
-+  __ verify_oop(x12);
-+  __ null_check(x12);
-+  // do the call
-+  __ profile_call(x10);
-+  __ profile_arguments_type(x10, xmethod, xbcp, false);
-+  __ jump_from_interpreted(xmethod);
++  if (order != memory_order_relaxed) {
++    FULL_MEM_BARRIER;
++  }
++  return value;
 +}
 +
-+void TemplateTable::invokestatic(int byte_no)
-+{
-+  transition(vtos, vtos);
-+  assert(byte_no == f1_byte, "use this arugment");
-+
-+  prepare_invoke(byte_no, xmethod);  // get f1 Method*
-+  // do the call
-+  __ profile_call(x10);
-+  __ profile_arguments_type(x10, xmethod, x14, false);
-+  __ jump_from_interpreted(xmethod);
++template<>
++template<typename T>
++inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
++                                                T volatile* dest __attribute__((unused)),
++                                                T compare_value,
++                                                atomic_memory_order order) const {
++  STATIC_ASSERT(4 == sizeof(T));
++  if (order != memory_order_relaxed) {
++    FULL_MEM_BARRIER;
++  }
++  T rv;
++  int tmp;
++  __asm volatile(
++    "1:\n\t"
++    " addiw     %[tmp], %[cv], 0\n\t" // make sure compare_value signed_extend
++    " lr.w.aq   %[rv], (%[dest])\n\t"
++    " bne       %[rv], %[tmp], 2f\n\t"
++    " sc.w.rl   %[tmp], %[ev], (%[dest])\n\t"
++    " bnez      %[tmp], 1b\n\t"
++    "2:\n\t"
++    : [rv] "=&r" (rv), [tmp] "=&r" (tmp)
++    : [ev] "r" (exchange_value), [dest] "r" (dest), [cv] "r" (compare_value)
++    : "memory");
++  if (order != memory_order_relaxed) {
++    FULL_MEM_BARRIER;
++  }
++  return rv;
 +}
 +
-+void TemplateTable::fast_invokevfinal(int byte_no)
-+{
-+  __ call_Unimplemented();
-+}
-+
-+void TemplateTable::invokeinterface(int byte_no) {
-+  transition(vtos, vtos);
-+  assert(byte_no == f1_byte, "use this argument");
-+
-+  prepare_invoke(byte_no, x10, xmethod,  // get f1 Klass*, f2 Method*
-+                 x12, x13);  // recv, flags
-+
-+  // x10: interface klass (from f1)
-+  // xmethod: method (from f2)
-+  // x12: receiver
-+  // x13: flags
-+
-+  // First check for Object case, then private interface method,
-+  // then regular interface method.
-+
-+  // Special case of invokeinterface called for virtual method of
-+  // java.lang.Object. See cpCache.cpp for details
-+  Label notObjectMethod;
-+  __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_forced_virtual_shift);
-+  __ beqz(t0, notObjectMethod);
-+
-+  invokevirtual_helper(xmethod, x12, x13);
-+  __ bind(notObjectMethod);
-+
-+  Label no_such_interface;
-+
-+  // Check for private method invocation - indicated by vfinal
-+  Label notVFinal;
-+  __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_vfinal_shift);
-+  __ beqz(t0, notVFinal);
-+
-+  // Check receiver klass into x13 - also a null check
-+  __ null_check(x12, oopDesc::klass_offset_in_bytes());
-+  __ load_klass(x13, x12);
-+
-+  Label subtype;
-+  __ check_klass_subtype(x13, x10, x14, subtype);
-+  // If we get here the typecheck failed
-+  __ j(no_such_interface);
-+  __ bind(subtype);
-+
-+  __ profile_final_call(x10);
-+  __ profile_arguments_type(x10, xmethod, x14, true);
-+  __ jump_from_interpreted(xmethod);
-+
-+  __ bind(notVFinal);
-+
-+  // Get receiver klass into x13 - also a null check
-+  __ restore_locals();
-+  __ null_check(x12, oopDesc::klass_offset_in_bytes());
-+  __ load_klass(x13, x12);
-+
-+  Label no_such_method;
-+
-+  // Preserve method for the throw_AbstractMethodErrorVerbose.
-+  __ mv(x28, xmethod);
-+  // Receiver subtype check against REFC.
-+  // Superklass in x10. Subklass in x13. Blows t1, x30
-+  __ lookup_interface_method(// inputs: rec. class, interface, itable index
-+                             x13, x10, noreg,
-+                             // outputs: scan temp. reg, scan temp. reg
-+                             t1, x30,
-+                             no_such_interface,
-+                             /*return_method=*/false);
++#undef FULL_COMPILER_ATOMIC_SUPPORT
++#endif // OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
+diff --git a/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp
+new file mode 100644
+index 0000000000..28868c7640
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp
+@@ -0,0 +1,45 @@
++/*
++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  // profile this call
-+  __ profile_virtual_call(x13, x30, x9);
++#ifndef OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP
++#define OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP
 +
-+  // Get declaring interface class from method, and itable index
-+  __ load_method_holder(x10, xmethod);
-+  __ lwu(xmethod, Address(xmethod, Method::itable_index_offset()));
-+  __ subw(xmethod, xmethod, Method::itable_index_max);
-+  __ negw(xmethod, xmethod);
++#include <byteswap.h>
 +
-+  // Preserve recvKlass for throw_AbstractMethodErrorVerbose
-+  __ mv(xlocals, x13);
-+  __ lookup_interface_method(// inputs: rec. class, interface, itable index
-+                             xlocals, x10, xmethod,
-+                             // outputs: method, scan temp. reg
-+                             xmethod, x30,
-+                             no_such_interface);
++// Efficient swapping of data bytes from Java byte
++// ordering to native byte ordering and vice versa.
++inline u2   Bytes::swap_u2(u2 x) {
++  return bswap_16(x);
++}
 +
-+  // xmethod: Method to call
-+  // x12: receiver
-+  // Check for abstract method error
-+  // Note: This should be done more efficiently via a throw_abstract_method_error
-+  //       interpreter entry point and a conditional jump to it in case of a null
-+  //       method.
-+  __ beqz(xmethod, no_such_method);
++inline u4   Bytes::swap_u4(u4 x) {
++  return bswap_32(x);
++}
 +
-+  __ profile_arguments_type(x13, xmethod, x30, true);
++inline u8 Bytes::swap_u8(u8 x) {
++  return bswap_64(x);
++}
 +
-+  // do the call
-+  // x12: receiver
-+  // xmethod: Method
-+  __ jump_from_interpreted(xmethod);
-+  __ should_not_reach_here();
++#endif // OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP
+diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp
+new file mode 100644
+index 0000000000..bdf36d6b4c
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp
+@@ -0,0 +1,124 @@
++/*
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  // exception handling code follows ...
-+  // note: must restore interpreter registers to canonical
-+  //       state for exception handling to work correctly!
++#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP
++#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP
 +
-+  __ bind(no_such_method);
-+  // throw exception
-+  __ restore_bcp();    // bcp must be correct for exception handler   (was destroyed)
-+  __ restore_locals(); // make sure locals pointer is correct as well (was destroyed)
-+  // Pass arguments for generating a verbose error message.
-+  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), x13, x28);
-+  // the call_VM checks for exception, so we should never return here.
-+  __ should_not_reach_here();
++static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  (void)memmove(to, from, count * HeapWordSize);
++}
 +
-+  __ bind(no_such_interface);
-+  // throw exceptiong
-+  __ restore_bcp();    // bcp must be correct for exception handler   (was destroyed)
-+  __ restore_locals(); // make sure locals pointer is correct as well (was destroyed)
-+  // Pass arguments for generating a verbose error message.
-+  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
-+                                     InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), x13, x10);
-+  // the call_VM checks for exception, so we should never return here.
-+  __ should_not_reach_here();
-+  return;
++static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  switch (count) {
++    case 8:  to[7] = from[7];   // fall through
++    case 7:  to[6] = from[6];   // fall through
++    case 6:  to[5] = from[5];   // fall through
++    case 5:  to[4] = from[4];   // fall through
++    case 4:  to[3] = from[3];   // fall through
++    case 3:  to[2] = from[2];   // fall through
++    case 2:  to[1] = from[1];   // fall through
++    case 1:  to[0] = from[0];   // fall through
++    case 0:  break;
++    default:
++      memcpy(to, from, count * HeapWordSize);
++      break;
++  }
 +}
 +
-+void TemplateTable::invokehandle(int byte_no) {
-+  transition(vtos, vtos);
-+  assert(byte_no == f1_byte, "use this argument");
++static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
++  switch (count) {
++    case 8:  to[7] = from[7];
++    case 7:  to[6] = from[6];
++    case 6:  to[5] = from[5];
++    case 5:  to[4] = from[4];
++    case 4:  to[3] = from[3];
++    case 3:  to[2] = from[2];
++    case 2:  to[1] = from[1];
++    case 1:  to[0] = from[0];
++    case 0:  break;
++    default:
++      while (count-- > 0) {
++        *to++ = *from++;
++      }
++      break;
++  }
++}
 +
-+  prepare_invoke(byte_no, xmethod, x10, x12);
-+  __ verify_method_ptr(x12);
-+  __ verify_oop(x12);
-+  __ null_check(x12);
++static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_words(from, to, count);
++}
 +
-+  // FIXME: profile the LambdaForm also
++static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_disjoint_words(from, to, count);
++}
 +
-+  // x30 is safe to use here as a temp reg because it is about to
-+  // be clobbered by jump_from_interpreted().
-+  __ profile_final_call(x30);
-+  __ profile_arguments_type(x30, xmethod, x14, true);
++static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
++  (void)memmove(to, from, count);
++}
 +
-+  __ jump_from_interpreted(xmethod);
++static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
++  pd_conjoint_bytes(from, to, count);
 +}
 +
-+void TemplateTable::invokedynamic(int byte_no) {
-+  transition(vtos, vtos);
-+  assert(byte_no == f1_byte, "use this argument");
++static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
++  _Copy_conjoint_jshorts_atomic(from, to, count);
++}
 +
-+  prepare_invoke(byte_no, xmethod, x10);
++static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
++  _Copy_conjoint_jints_atomic(from, to, count);
++}
 +
-+  // x10: CallSite object (from cpool->resolved_references[])
-+  // xmethod: MH.linkToCallSite method (from f2)
++static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
++  _Copy_conjoint_jlongs_atomic(from, to, count);
++}
 +
-+  // Note: x10_callsite is already pushed by prepare_invoke
++static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
++  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size.");
++  _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count);
++}
 +
-+  // %%% should make a type profile for any invokedynamic that takes a ref argument
-+  // profile this call
-+  __ profile_call(xbcp);
-+  __ profile_arguments_type(x13, xmethod, x30, false);
++static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
++  _Copy_arrayof_conjoint_bytes(from, to, count);
++}
 +
-+  __ verify_oop(x10);
++static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) {
++  _Copy_arrayof_conjoint_jshorts(from, to, count);
++}
 +
-+  __ jump_from_interpreted(xmethod);
++static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) {
++  _Copy_arrayof_conjoint_jints(from, to, count);
 +}
 +
-+//-----------------------------------------------------------------------------
-+// Allocation
++static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) {
++  _Copy_arrayof_conjoint_jlongs(from, to, count);
++}
 +
-+void TemplateTable::_new() {
-+  transition(vtos, atos);
++static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) {
++  assert(!UseCompressedOops, "foo!");
++  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
++  _Copy_arrayof_conjoint_jlongs(from, to, count);
++}
 +
-+  __ get_unsigned_2_byte_index_at_bcp(x13, 1);
-+  Label slow_case;
-+  Label done;
-+  Label initialize_header;
-+  Label initialize_object; // including clearing the fields
++#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP
+diff --git a/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp
+new file mode 100644
+index 0000000000..297414bfcd
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp
+@@ -0,0 +1,43 @@
++/*
++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  __ get_cpool_and_tags(x14, x10);
-+  // Make sure the class we're about to instantiate has been resolved.
-+  // This is done before loading InstanceKlass to be consistent with the order
-+  // how Constant Pool is update (see ConstantPool::klass_at_put)
-+  const int tags_offset = Array<u1>::base_offset_in_bytes();
-+  __ add(t0, x10, x13);
-+  __ la(t0, Address(t0, tags_offset));
-+  __ membar(MacroAssembler::AnyAny);
-+  __ lbu(t0, t0);
-+  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
-+  __ sub(t1, t0, (u1)JVM_CONSTANT_Class);
-+  __ bnez(t1, slow_case);
++#ifndef OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP
++#define OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP
 +
-+  // get InstanceKlass
-+  __ load_resolved_klass_at_offset(x14, x13, x14, t0);
++// Sets the default values for platform dependent flags used by the runtime system.
++// (see globals.hpp)
 +
-+  // make sure klass is initialized & doesn't have finalizer
-+  // make sure klass is fully initialized
-+  __ lbu(t0, Address(x14, InstanceKlass::init_state_offset()));
-+  __ sub(t1, t0, (u1)InstanceKlass::fully_initialized);
-+  __ bnez(t1, slow_case);
++define_pd_global(bool,  DontYieldALot,            false);
++define_pd_global(intx,  ThreadStackSize,          2048); // 0 => use system default
++define_pd_global(intx,  VMThreadStackSize,        2048);
 +
-+  // get instance_size in InstanceKlass (scaled to a count of bytes)
-+  __ lwu(x13, Address(x14, Klass::layout_helper_offset()));
-+  // test to see if it has a finalizer or is malformed in some way
-+  __ andi(t0, x13, Klass::_lh_instance_slow_path_bit);
-+  __ bnez(t0, slow_case);
++define_pd_global(intx,  CompilerThreadStackSize,  2048);
 +
-+  // Allocate the instance:
-+  //  If TLAB is enabled:
-+  //    Try to allocate in the TLAB.
-+  //    If fails, go to the slow path.
-+  //  Else If inline contiguous allocations are enabled:
-+  //    Try to allocate in eden.
-+  //    If fails due to heap end, go to slow path
-+  //
-+  //  If TLAB is enabled OR inline contiguous is enabled:
-+  //    Initialize the allocation.
-+  //    Exit.
-+  //  Go to slow path.
-+  const bool allow_shared_alloc = Universe::heap()->supports_inline_contig_alloc();
++define_pd_global(uintx, JVMInvokeMethodSlack,     8192);
 +
-+  if (UseTLAB) {
-+    __ tlab_allocate(x10, x13, 0, noreg, x11, slow_case);
++// Used on 64 bit platforms for UseCompressedOops base address
++define_pd_global(uintx, HeapBaseMinAddress,       2 * G);
 +
-+    if (ZeroTLAB) {
-+      // the fields have been already cleared
-+      __ j(initialize_header);
-+    } else {
-+      // initialize both the header and fields
-+      __ j(initialize_object);
-+    }
-+  } else {
-+    // Allocation in the shared Eden, if allowed.
-+    //
-+    // x13: instance size in bytes
-+    if (allow_shared_alloc) {
-+      __ eden_allocate(x10, x13, 0, x28, slow_case);
-+    }
-+  }
++#endif // OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP
+diff --git a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
+new file mode 100644
+index 0000000000..5b5d35553f
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
+@@ -0,0 +1,74 @@
++/*
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  // If USETLAB or allow_shared_alloc are true, the object is created above and
-+  // there is an initialized need. Otherwise, skip and go to the slow path.
-+  if (UseTLAB || allow_shared_alloc) {
-+    // The object is initialized before the header. If the object size is
-+    // zero, go directly to the header initialization.
-+    __ bind(initialize_object);
-+    __ sub(x13, x13, sizeof(oopDesc));
-+    __ beqz(x13, initialize_header);
++#ifndef OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP
++#define OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP
 +
-+    // Initialize obejct fields
-+    {
-+      __ add(x12, x10, sizeof(oopDesc));
-+      Label loop;
-+      __ bind(loop);
-+      __ sd(zr, Address(x12));
-+      __ add(x12, x12, BytesPerLong);
-+      __ sub(x13, x13, BytesPerLong);
-+      __ bnez(x13, loop);
-+    }
++// Included in orderAccess.hpp header file.
 +
-+    // initialize object hader only.
-+    __ bind(initialize_header);
-+    __ mv(t0, (intptr_t)markWord::prototype().value());
-+    __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes()));
-+    __ store_klass_gap(x10, zr);   // zero klass gap for compressed oops
-+    __ store_klass(x10, x14);      // store klass last
++#include "runtime/vm_version.hpp"
 +
-+    {
-+      SkipIfEqual skip(_masm, &DTraceAllocProbes, false);
-+      // Trigger dtrace event for fastpath
-+      __ push(atos); // save the return value
-+      __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast<int (*)(oopDesc*)>(SharedRuntime::dtrace_object_alloc)), x10);
-+      __ pop(atos); // restore the return value
-+    }
-+    __ j(done);
-+  }
++// Implementation of class OrderAccess.
 +
-+  // slow case
-+  __ bind(slow_case);
-+  __ get_constant_pool(c_rarg1);
-+  __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1);
-+  call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), c_rarg1, c_rarg2);
-+  __ verify_oop(x10);
++inline void OrderAccess::loadload()   { acquire(); }
++inline void OrderAccess::storestore() { release(); }
++inline void OrderAccess::loadstore()  { acquire(); }
++inline void OrderAccess::storeload()  { fence(); }
 +
-+  // continue
-+  __ bind(done);
-+  // Must prevent reordering of stores for object initialization with stores that publish the new object.
-+  __ membar(MacroAssembler::StoreStore);
++inline void OrderAccess::acquire() {
++  READ_MEM_BARRIER;
 +}
 +
-+void TemplateTable::newarray() {
-+  transition(itos, atos);
-+  __ load_unsigned_byte(c_rarg1, at_bcp(1));
-+  __ mv(c_rarg2, x10);
-+  call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray),
-+          c_rarg1, c_rarg2);
-+  // Must prevent reordering of stores for object initialization with stores that publish the new object.
-+  __ membar(MacroAssembler::StoreStore);
++inline void OrderAccess::release() {
++  WRITE_MEM_BARRIER;
 +}
 +
-+void TemplateTable::anewarray() {
-+  transition(itos, atos);
-+  __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1);
-+  __ get_constant_pool(c_rarg1);
-+  __ mv(c_rarg3, x10);
-+  call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray),
-+          c_rarg1, c_rarg2, c_rarg3);
-+  // Must prevent reordering of stores for object initialization with stores that publish the new object.
-+  __ membar(MacroAssembler::StoreStore);
++inline void OrderAccess::fence() {
++  FULL_MEM_BARRIER;
 +}
 +
-+void TemplateTable::arraylength() {
-+  transition(atos, itos);
-+  __ null_check(x10, arrayOopDesc::length_offset_in_bytes());
-+  __ lwu(x10, Address(x10, arrayOopDesc::length_offset_in_bytes()));
-+}
 +
-+void TemplateTable::checkcast()
++template<size_t byte_size>
++struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE>
 +{
-+  transition(atos, atos);
-+  Label done, is_null, ok_is_subtype, quicked, resolved;
-+  __ beqz(x10, is_null);
++  template <typename T>
++  T operator()(const volatile T* p) const { T data; __atomic_load(const_cast<T*>(p), &data, __ATOMIC_ACQUIRE); return data; }
++};
 +
-+  // Get cpool & tags index
-+  __ get_cpool_and_tags(x12, x13); // x12=cpool, x13=tags array
-+  __ get_unsigned_2_byte_index_at_bcp(x9, 1); // x9=index
-+  // See if bytecode has already been quicked
-+  __ add(t0, x13, Array<u1>::base_offset_in_bytes());
-+  __ add(x11, t0, x9);
-+  __ membar(MacroAssembler::AnyAny);
-+  __ lbu(x11, x11);
-+  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
-+  __ sub(t0, x11, (u1)JVM_CONSTANT_Class);
-+  __ beqz(t0, quicked);
-+
-+  __ push(atos); // save receiver for result, and for GC
-+  call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
-+  // vm_result_2 has metadata result
-+  __ get_vm_result_2(x10, xthread);
-+  __ pop_reg(x13); // restore receiver
-+  __ j(resolved);
-+
-+  // Get superklass in x10 and subklass in x13
-+  __ bind(quicked);
-+  __ mv(x13, x10); // Save object in x13; x10 needed for subtype check
-+  __ load_resolved_klass_at_offset(x12, x9, x10, t0); // x10 = klass
-+
-+  __ bind(resolved);
-+  __ load_klass(x9, x13);
-+
-+  // Generate subtype check.  Blows x12, x15.  Object in x13.
-+  // Superklass in x10.  Subklass in x9.
-+  __ gen_subtype_check(x9, ok_is_subtype);
-+
-+  // Come here on failure
-+  __ push_reg(x13);
-+  // object is at TOS
-+  __ j(Interpreter::_throw_ClassCastException_entry);
-+
-+  // Come here on success
-+  __ bind(ok_is_subtype);
-+  __ mv(x10, x13); // Restore object in x13
-+
-+  // Collect counts on whether this test sees NULLs a lot or not.
-+  if (ProfileInterpreter) {
-+    __ j(done);
-+    __ bind(is_null);
-+    __ profile_null_seen(x12);
-+  } else {
-+    __ bind(is_null);   // same as 'done'
-+  }
-+  __ bind(done);
-+}
-+
-+void TemplateTable::instanceof() {
-+  transition(atos, itos);
-+  Label done, is_null, ok_is_subtype, quicked, resolved;
-+  __ beqz(x10, is_null);
-+
-+  // Get cpool & tags index
-+  __ get_cpool_and_tags(x12, x13); // x12=cpool, x13=tags array
-+  __ get_unsigned_2_byte_index_at_bcp(x9, 1); // x9=index
-+  // See if bytecode has already been quicked
-+  __ add(t0, x13, Array<u1>::base_offset_in_bytes());
-+  __ add(x11, t0, x9);
-+  __ membar(MacroAssembler::AnyAny);
-+  __ lbu(x11, x11);
-+  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
-+  __ sub(t0, x11, (u1)JVM_CONSTANT_Class);
-+  __ beqz(t0, quicked);
-+
-+  __ push(atos); // save receiver for result, and for GC
-+  call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
-+  // vm_result_2 has metadata result
-+  __ get_vm_result_2(x10, xthread);
-+  __ pop_reg(x13); // restore receiver
-+  __ verify_oop(x13);
-+  __ load_klass(x13, x13);
-+  __ j(resolved);
-+
-+  // Get superklass in x10 and subklass in x13
-+  __ bind(quicked);
-+  __ load_klass(x13, x10);
-+  __ load_resolved_klass_at_offset(x12, x9, x10, t0);
-+
-+  __ bind(resolved);
-+
-+  // Generate subtype check.  Blows x12, x15
-+  // Superklass in x10.  Subklass in x13.
-+  __ gen_subtype_check(x13, ok_is_subtype);
-+
-+  // Come here on failure
-+  __ mv(x10, zr);
-+  __ j(done);
-+  // Come here on success
-+  __ bind(ok_is_subtype);
-+  __ li(x10, 1);
-+
-+  // Collect counts on whether this test sees NULLs a lot or not.
-+  if (ProfileInterpreter) {
-+    __ j(done);
-+    __ bind(is_null);
-+    __ profile_null_seen(x12);
-+  } else {
-+    __ bind(is_null);   // same as 'done'
-+  }
-+  __ bind(done);
-+  // x10 = 0: obj == NULL or  obj is not an instanceof the specified klass
-+  // x10 = 1: obj != NULL and obj is     an instanceof the specified klass
-+}
-+
-+//-----------------------------------------------------------------------------
-+// Breakpoints
-+void TemplateTable::_breakpoint() {
-+  // Note: We get here even if we are single stepping..
-+  // jbug inists on setting breakpoints at every bytecode
-+  // even if we are in single step mode.
-+
-+  transition(vtos, vtos);
-+
-+  // get the unpatched byte code
-+  __ get_method(c_rarg1);
-+  __ call_VM(noreg,
-+             CAST_FROM_FN_PTR(address,
-+                              InterpreterRuntime::get_original_bytecode_at),
-+             c_rarg1, xbcp);
-+  __ mv(x9, x10);
-+
-+  // post the breakpoint event
-+  __ call_VM(noreg,
-+             CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint),
-+             xmethod, xbcp);
-+
-+  // complete the execution of original bytecode
-+  __ mv(t0, x9);
-+  __ dispatch_only_normal(vtos);
-+}
-+
-+//-----------------------------------------------------------------------------
-+// Exceptions
-+
-+void TemplateTable::athrow() {
-+  transition(atos, vtos);
-+  __ null_check(x10);
-+  __ j(Interpreter::throw_exception_entry());
-+}
-+
-+//-----------------------------------------------------------------------------
-+// Synchronization
-+//
-+// Note: monitorenter & exit are symmetric routines; which is reflected
-+//       in the assembly code structure as well
-+//
-+// Stack layout:
-+//
-+// [expressions  ] <--- esp               = expression stack top
-+// ..
-+// [expressions  ]
-+// [monitor entry] <--- monitor block top = expression stack bot
-+// ..
-+// [monitor entry]
-+// [frame data   ] <--- monitor block bot
-+// ...
-+// [saved fp     ] <--- fp
-+void TemplateTable::monitorenter()
-+{
-+  transition(atos, vtos);
-+
-+   // check for NULL object
-+   __ null_check(x10);
-+
-+   const Address monitor_block_top(
-+         fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
-+   const Address monitor_block_bot(
-+         fp, frame::interpreter_frame_initial_sp_offset * wordSize);
-+   const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
-+
-+   Label allocated;
-+
-+   // initialize entry pointer
-+   __ mv(c_rarg1, zr); // points to free slot or NULL
-+
-+   // find a free slot in the monitor block (result in c_rarg1)
-+   {
-+     Label entry, loop, exit, notUsed;
-+     __ ld(c_rarg3, monitor_block_top); // points to current entry,
-+                                        // starting with top-most entry
-+     __ la(c_rarg2, monitor_block_bot); // points to word before bottom
-+
-+     __ j(entry);
-+
-+     __ bind(loop);
-+     // check if current entry is used
-+     // if not used then remember entry in c_rarg1
-+     __ ld(t0, Address(c_rarg3, BasicObjectLock::obj_offset_in_bytes()));
-+     __ bnez(t0, notUsed);
-+     __ mv(c_rarg1, c_rarg3);
-+     __ bind(notUsed);
-+     // check if current entry is for same object
-+     // if same object then stop searching
-+     __ beq(x10, t0, exit);
-+     // otherwise advance to next entry
-+     __ add(c_rarg3, c_rarg3, entry_size);
-+     __ bind(entry);
-+     // check if bottom reached
-+     // if not at bottom then check this entry
-+     __ bne(c_rarg3, c_rarg2, loop);
-+     __ bind(exit);
-+   }
-+
-+   __ bnez(c_rarg1, allocated); // check if a slot has been found and
-+                             // if found, continue with that on
-+
-+   // allocate one if there's no free slot
-+   {
-+     Label entry, loop;
-+     // 1. compute new pointers            // esp: old expression stack top
-+     __ ld(c_rarg1, monitor_block_bot);    // c_rarg1: old expression stack bottom
-+     __ sub(esp, esp, entry_size);         // move expression stack top
-+     __ sub(c_rarg1, c_rarg1, entry_size); // move expression stack bottom
-+     __ mv(c_rarg3, esp);                  // set start value for copy loop
-+     __ sd(c_rarg1, monitor_block_bot);    // set new monitor block bottom
-+     __ sub(sp, sp, entry_size);           // make room for the monitor
-+
-+     __ j(entry);
-+     // 2. move expression stack contents
-+     __ bind(loop);
-+     __ ld(c_rarg2, Address(c_rarg3, entry_size)); // load expression stack
-+                                                   // word from old location
-+     __ sd(c_rarg2, Address(c_rarg3, 0));          // and store it at new location
-+     __ add(c_rarg3, c_rarg3, wordSize);           // advance to next word
-+     __ bind(entry);
-+     __ bne(c_rarg3, c_rarg1, loop);    // check if bottom reached.if not at bottom
-+                                        // then copy next word
-+   }
-+
-+   // call run-time routine
-+   // c_rarg1: points to monitor entry
-+   __ bind(allocated);
-+
-+   // Increment bcp to point to the next bytecode, so exception
-+   // handling for async. exceptions work correctly.
-+   // The object has already been poped from the stack, so the
-+   // expression stack looks correct.
-+   __ addi(xbcp, xbcp, 1);
-+
-+   // store object
-+   __ sd(x10, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
-+   __ lock_object(c_rarg1);
-+
-+   // check to make sure this monitor doesn't cause stack overflow after locking
-+   __ save_bcp();  // in case of exception
-+   __ generate_stack_overflow_check(0);
-+
-+   // The bcp has already been incremented. Just need to dispatch to
-+   // next instruction.
-+   __ dispatch_next(vtos);
-+}
-+
-+void TemplateTable::monitorexit()
++template<size_t byte_size>
++struct OrderAccess::PlatformOrderedStore<byte_size, RELEASE_X>
 +{
-+  transition(atos, vtos);
-+
-+  // check for NULL object
-+  __ null_check(x10);
-+
-+  const Address monitor_block_top(
-+        fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
-+  const Address monitor_block_bot(
-+        fp, frame::interpreter_frame_initial_sp_offset * wordSize);
-+  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
-+
-+  Label found;
-+
-+  // find matching slot
-+  {
-+    Label entry, loop;
-+    __ ld(c_rarg1, monitor_block_top); // points to current entry,
-+                                        // starting with top-most entry
-+    __ la(c_rarg2, monitor_block_bot); // points to word before bottom
-+                                        // of monitor block
-+    __ j(entry);
-+
-+    __ bind(loop);
-+    // check if current entry is for same object
-+    __ ld(t0, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
-+    // if same object then stop searching
-+    __ beq(x10, t0, found);
-+    // otherwise advance to next entry
-+    __ add(c_rarg1, c_rarg1, entry_size);
-+    __ bind(entry);
-+    // check if bottom reached
-+    // if not at bottom then check this entry
-+    __ bne(c_rarg1, c_rarg2, loop);
-+  }
-+
-+  // error handling. Unlocking was not block-structured
-+  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
-+                   InterpreterRuntime::throw_illegal_monitor_state_exception));
-+  __ should_not_reach_here();
-+
-+  // call run-time routine
-+  __ bind(found);
-+  __ push_ptr(x10); // make sure object is on stack (contract with oopMaps)
-+  __ unlock_object(c_rarg1);
-+  __ pop_ptr(x10); // discard object
-+}
++  template <typename T>
++  void operator()(T v, volatile T* p) const { __atomic_store(const_cast<T*>(p), &v, __ATOMIC_RELEASE); }
++};
 +
-+// Wide instructions
-+void TemplateTable::wide()
++template<size_t byte_size>
++struct OrderAccess::PlatformOrderedStore<byte_size, RELEASE_X_FENCE>
 +{
-+  __ load_unsigned_byte(x9, at_bcp(1));
-+  __ mv(t0, (address)Interpreter::_wentry_point);
-+  __ shadd(t0, x9, t0, t1, 3);
-+  __ ld(t0, Address(t0));
-+  __ jr(t0);
-+}
++  template <typename T>
++  void operator()(T v, volatile T* p) const { release_store(p, v); OrderAccess::fence(); }
++};
 +
-+// Multi arrays
-+void TemplateTable::multianewarray() {
-+  transition(vtos, atos);
-+  __ load_unsigned_byte(x10, at_bcp(3)); // get number of dimensions
-+  // last dim is on top of stack; we want address of first one:
-+  // first_addr = last_addr + (ndims - 1) * wordSize
-+  __ shadd(c_rarg1, x10, esp, c_rarg1, 3);
-+  __ sub(c_rarg1, c_rarg1, wordSize);
-+  call_VM(x10,
-+          CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray),
-+          c_rarg1);
-+  __ load_unsigned_byte(x11, at_bcp(3));
-+  __ shadd(esp, x11, esp, t0, 3);
-+}
-diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.hpp b/src/hotspot/cpu/riscv/templateTable_riscv.hpp
++#endif // OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP
+diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
 new file mode 100644
-index 00000000000..fcc86108d28
+index 0000000000..8b772892b4
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/templateTable_riscv.hpp
-@@ -0,0 +1,42 @@
++++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
+@@ -0,0 +1,624 @@
 +/*
-+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -55715,480 +52535,614 @@ index 00000000000..fcc86108d28
 + *
 + */
 +
-+#ifndef CPU_RISCV_TEMPLATETABLE_RISCV_HPP
-+#define CPU_RISCV_TEMPLATETABLE_RISCV_HPP
-+
-+static void prepare_invoke(int byte_no,
-+                           Register method,         // linked method (or i-klass)
-+                           Register index = noreg,  // itable index, MethodType, etc.
-+                           Register recv  = noreg,  // if caller wants to see it
-+                           Register flags = noreg   // if caller wants to test it
-+                           );
-+static void invokevirtual_helper(Register index, Register recv,
-+                                 Register flags);
++// no precompiled headers
++#include "asm/macroAssembler.hpp"
++#include "classfile/vmSymbols.hpp"
++#include "code/codeCache.hpp"
++#include "code/icBuffer.hpp"
++#include "code/nativeInst.hpp"
++#include "code/vtableStubs.hpp"
++#include "interpreter/interpreter.hpp"
++#include "jvm.h"
++#include "memory/allocation.inline.hpp"
++#include "os_share_linux.hpp"
++#include "prims/jniFastGetField.hpp"
++#include "prims/jvm_misc.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/extendedPC.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/java.hpp"
++#include "runtime/javaCalls.hpp"
++#include "runtime/mutexLocker.hpp"
++#include "runtime/osThread.hpp"
++#include "runtime/safepointMechanism.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++#include "runtime/timer.hpp"
++#include "utilities/debug.hpp"
++#include "utilities/events.hpp"
++#include "utilities/vmError.hpp"
 +
-+// Helpers
-+static void index_check(Register array, Register index);
++// put OS-includes here
++# include <dlfcn.h>
++# include <fpu_control.h>
++# include <errno.h>
++# include <pthread.h>
++# include <signal.h>
++# include <stdio.h>
++# include <stdlib.h>
++# include <sys/mman.h>
++# include <sys/resource.h>
++# include <sys/socket.h>
++# include <sys/stat.h>
++# include <sys/time.h>
++# include <sys/types.h>
++# include <sys/utsname.h>
++# include <sys/wait.h>
++# include <poll.h>
++# include <pwd.h>
++# include <ucontext.h>
++# include <unistd.h>
 +
-+#endif // CPU_RISCV_TEMPLATETABLE_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp b/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp
-new file mode 100644
-index 00000000000..4f50adb05c3
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp
-@@ -0,0 +1,33 @@
-+/*
-+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++#define REG_LR       1
++#define REG_FP       8
 +
-+#include "precompiled.hpp"
-+#include "prims/universalNativeInvoker.hpp"
-+#include "utilities/debug.hpp"
++NOINLINE address os::current_stack_pointer() {
++  return (address)__builtin_frame_address(0);
++}
 +
-+address ProgrammableInvoker::generate_adapter(jobject jabi, jobject jlayout) {
-+  Unimplemented();
-+  return nullptr;
++char* os::non_memory_address_word() {
++  // Must never look like an address returned by reserve_memory,
++  return (char*) -1;
 +}
-diff --git a/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp b/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp
-new file mode 100644
-index 00000000000..ce70da72f2e
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp
-@@ -0,0 +1,42 @@
-+/*
-+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
 +
-+#include "precompiled.hpp"
-+#include "prims/universalUpcallHandler.hpp"
-+#include "utilities/debug.hpp"
++address os::Linux::ucontext_get_pc(const ucontext_t * uc) {
++  return (address)uc->uc_mcontext.__gregs[REG_PC];
++}
 +
-+address ProgrammableUpcallHandler::generate_upcall_stub(jobject jrec, jobject jabi, jobject jlayout) {
-+  Unimplemented();
-+  return nullptr;
++void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) {
++  uc->uc_mcontext.__gregs[REG_PC] = (intptr_t)pc;
 +}
 +
-+address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject mh, Method* entry, jobject jabi, jobject jconv) {
-+  ShouldNotCallThis();
-+  return nullptr;
++intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) {
++  return (intptr_t*)uc->uc_mcontext.__gregs[REG_SP];
 +}
 +
-+bool ProgrammableUpcallHandler::supports_optimized_upcalls() {
-+  return false;
++intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) {
++  return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP];
 +}
-diff --git a/src/hotspot/cpu/riscv/vmStructs_riscv.hpp b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp
-new file mode 100644
-index 00000000000..6c89133de02
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp
-@@ -0,0 +1,42 @@
-+/*
-+ * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
 +
-+#ifndef CPU_RISCV_VMSTRUCTS_RISCV_HPP
-+#define CPU_RISCV_VMSTRUCTS_RISCV_HPP
-+
-+// These are the CPU-specific fields, types and integer
-+// constants required by the Serviceability Agent. This file is
-+// referenced by vmStructs.cpp.
-+
-+#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
-+  volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*)
-+
-+#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
-+
-+#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
-+
-+#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
-+
-+#endif // CPU_RISCV_VMSTRUCTS_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-new file mode 100644
-index 00000000000..768c7633ca6
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-@@ -0,0 +1,230 @@
-+/*
-+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#include "precompiled.hpp"
-+#include "runtime/java.hpp"
-+#include "runtime/os.hpp"
-+#include "runtime/vm_version.hpp"
-+#include "utilities/formatBuffer.hpp"
-+#include "utilities/macros.hpp"
-+
-+#include OS_HEADER_INLINE(os)
++// For Forte Analyzer AsyncGetCallTrace profiling support - thread
++// is currently interrupted by SIGPROF.
++// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal
++// frames. Currently we don't do that on Linux, so it's the same as
++// os::fetch_frame_from_context().
++ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread,
++  const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) {
 +
-+const char* VM_Version::_uarch = "";
-+uint32_t VM_Version::_initial_vector_length = 0;
++  assert(thread != NULL, "just checking");
++  assert(ret_sp != NULL, "just checking");
++  assert(ret_fp != NULL, "just checking");
 +
-+void VM_Version::initialize() {
-+  get_os_cpu_info();
++  return os::fetch_frame_from_context(uc, ret_sp, ret_fp);
++}
 +
-+  if (FLAG_IS_DEFAULT(UseFMA)) {
-+    FLAG_SET_DEFAULT(UseFMA, true);
-+  }
++ExtendedPC os::fetch_frame_from_context(const void* ucVoid,
++                    intptr_t** ret_sp, intptr_t** ret_fp) {
 +
-+  if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
-+    FLAG_SET_DEFAULT(AllocatePrefetchDistance, 0);
-+  }
++  ExtendedPC epc;
++  const ucontext_t* uc = (const ucontext_t*)ucVoid;
 +
-+  if (UseAES || UseAESIntrinsics) {
-+    if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
-+      warning("AES instructions are not available on this CPU");
-+      FLAG_SET_DEFAULT(UseAES, false);
++  if (uc != NULL) {
++    epc = ExtendedPC(os::Linux::ucontext_get_pc(uc));
++    if (ret_sp != NULL) {
++      *ret_sp = os::Linux::ucontext_get_sp(uc);
 +    }
-+    if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
-+      warning("AES intrinsics are not available on this CPU");
-+      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
++    if (ret_fp != NULL) {
++      *ret_fp = os::Linux::ucontext_get_fp(uc);
++    }
++  } else {
++    // construct empty ExtendedPC for return value checking
++    epc = ExtendedPC(NULL);
++    if (ret_sp != NULL) {
++      *ret_sp = (intptr_t *)NULL;
++    }
++    if (ret_fp != NULL) {
++      *ret_fp = (intptr_t *)NULL;
 +    }
 +  }
 +
-+  if (UseAESCTRIntrinsics) {
-+    warning("AES/CTR intrinsics are not available on this CPU");
-+    FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
-+  }
++  return epc;
++}
 +
-+  if (UseSHA) {
-+    warning("SHA instructions are not available on this CPU");
-+    FLAG_SET_DEFAULT(UseSHA, false);
-+  }
++frame os::fetch_frame_from_context(const void* ucVoid) {
++  intptr_t* frame_sp = NULL;
++  intptr_t* frame_fp = NULL;
++  ExtendedPC epc = fetch_frame_from_context(ucVoid, &frame_sp, &frame_fp);
++  return frame(frame_sp, frame_fp, epc.pc());
++}
 +
-+  if (UseSHA1Intrinsics) {
-+    warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
-+    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
++bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) {
++  address pc = (address) os::Linux::ucontext_get_pc(uc);
++  if (Interpreter::contains(pc)) {
++    // interpreter performs stack banging after the fixed frame header has
++    // been generated while the compilers perform it before. To maintain
++    // semantic consistency between interpreted and compiled frames, the
++    // method returns the Java sender of the current frame.
++    *fr = os::fetch_frame_from_context(uc);
++    if (!fr->is_first_java_frame()) {
++      assert(fr->safe_for_sender(thread), "Safety check");
++      *fr = fr->java_sender();
++    }
++  } else {
++    // more complex code with compiled code
++    assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above");
++    CodeBlob* cb = CodeCache::find_blob(pc);
++    if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) {
++      // Not sure where the pc points to, fallback to default
++      // stack overflow handling
++      return false;
++    } else {
++      // In compiled code, the stack banging is performed before RA
++      // has been saved in the frame.  RA is live, and SP and FP
++      // belong to the caller.
++      intptr_t* fp = os::Linux::ucontext_get_fp(uc);
++      intptr_t* sp = os::Linux::ucontext_get_sp(uc);
++      address pc = (address)(uc->uc_mcontext.__gregs[REG_LR]
++                         - NativeInstruction::instruction_size);
++      *fr = frame(sp, fp, pc);
++      if (!fr->is_java_frame()) {
++        assert(fr->safe_for_sender(thread), "Safety check");
++        assert(!fr->is_first_frame(), "Safety check");
++        *fr = fr->java_sender();
++      }
++    }
 +  }
++  assert(fr->is_java_frame(), "Safety check");
++  return true;
++}
 +
-+  if (UseSHA256Intrinsics) {
-+    warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
-+    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
-+  }
++// By default, gcc always saves frame pointer rfp on this stack. This
++// may get turned off by -fomit-frame-pointer.
++frame os::get_sender_for_C_frame(frame* fr) {
++  return frame(fr->sender_sp(), fr->link(), fr->sender_pc());
++}
 +
-+  if (UseSHA512Intrinsics) {
-+    warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
-+    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
++NOINLINE frame os::current_frame() {
++  intptr_t **sender_sp = (intptr_t **)__builtin_frame_address(0);
++  if (sender_sp != NULL) {
++    frame myframe((intptr_t*)os::current_stack_pointer(),
++                  sender_sp[frame::link_offset],
++                  CAST_FROM_FN_PTR(address, os::current_frame));
++    if (os::is_first_C_frame(&myframe)) {
++      // stack is not walkable
++      return frame();
++    } else {
++      return os::get_sender_for_C_frame(&myframe);
++    }
++  } else {
++    ShouldNotReachHere();
++    return frame();
 +  }
++}
 +
-+  if (UseSHA3Intrinsics) {
-+    warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
-+    FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
-+  }
++// Utility functions
++extern "C" JNIEXPORT int
++JVM_handle_linux_signal(int sig,
++                        siginfo_t* info,
++                        void* ucVoid,
++                        int abort_if_unrecognized) {
++  ucontext_t* uc = (ucontext_t*) ucVoid;
 +
-+  if (UseCRC32Intrinsics) {
-+    warning("CRC32 intrinsics are not available on this CPU.");
-+    FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
-+  }
++  Thread* t = Thread::current_or_null_safe();
 +
-+  if (UseCRC32CIntrinsics) {
-+    warning("CRC32C intrinsics are not available on this CPU.");
-+    FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
-+  }
++  // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away
++  // (no destructors can be run)
++  os::ThreadCrashProtection::check_crash_protection(sig, t);
 +
-+  if (UseMD5Intrinsics) {
-+    warning("MD5 intrinsics are not available on this CPU.");
-+    FLAG_SET_DEFAULT(UseMD5Intrinsics, false);
-+  }
++  SignalHandlerMark shm(t);
 +
-+  if (UseRVV) {
-+    if (!(_features & CPU_V)) {
-+      warning("RVV is not supported on this CPU");
-+      FLAG_SET_DEFAULT(UseRVV, false);
++  // Note: it's not uncommon that JNI code uses signal/sigset to install
++  // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
++  // or have a SIGILL handler when detecting CPU type). When that happens,
++  // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To
++  // avoid unnecessary crash when libjsig is not preloaded, try handle signals
++  // that do not require siginfo/ucontext first.
++
++  if (sig == SIGPIPE || sig == SIGXFSZ) {
++    // allow chained handler to go first
++    if (os::Linux::chained_handler(sig, info, ucVoid)) {
++      return true;
 +    } else {
-+      // read vector length from vector CSR vlenb
-+      _initial_vector_length = get_current_vector_length();
++      // Ignoring SIGPIPE/SIGXFSZ - see bugs 4229104 or 6499219
++      return true;
 +    }
 +  }
 +
-+  if (UseRVB && !(_features & CPU_B)) {
-+    warning("RVB is not supported on this CPU");
-+    FLAG_SET_DEFAULT(UseRVB, false);
-+  }
-+
-+  if (UseRVC && !(_features & CPU_C)) {
-+    warning("RVC is not supported on this CPU");
-+    FLAG_SET_DEFAULT(UseRVC, false);
++#ifdef CAN_SHOW_REGISTERS_ON_ASSERT
++  if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) {
++    if (handle_assert_poison_fault(ucVoid, info->si_addr)) {
++      return 1;
++    }
 +  }
++#endif
 +
-+  if (FLAG_IS_DEFAULT(AvoidUnalignedAccesses)) {
-+    FLAG_SET_DEFAULT(AvoidUnalignedAccesses, true);
++  JavaThread* thread = NULL;
++  VMThread* vmthread = NULL;
++  if (os::Linux::signal_handlers_are_installed) {
++    if (t != NULL ){
++      if(t->is_Java_thread()) {
++        thread = (JavaThread *) t;
++      }
++      else if(t->is_VM_thread()){
++        vmthread = (VMThread *)t;
++      }
++    }
 +  }
 +
-+  if (UseRVB) {
-+    if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
-+      FLAG_SET_DEFAULT(UsePopCountInstruction, true);
++  // Handle SafeFetch faults
++  if ((sig == SIGSEGV || sig == SIGBUS) && uc != NULL) {
++    address const pc = (address) os::Linux::ucontext_get_pc(uc);
++    if (pc && StubRoutines::is_safefetch_fault(pc)) {
++      os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc));
++      return 1;
 +    }
-+  } else {
-+    FLAG_SET_DEFAULT(UsePopCountInstruction, false);
 +  }
 +
-+  char buf[512];
-+  buf[0] = '\0';
-+  if (_uarch != NULL && strcmp(_uarch, "") != 0) snprintf(buf, sizeof(buf), "%s,", _uarch);
-+  strcat(buf, "rv64");
-+#define ADD_FEATURE_IF_SUPPORTED(id, name, bit) if (_features & CPU_##id) strcat(buf, name);
-+  CPU_FEATURE_FLAGS(ADD_FEATURE_IF_SUPPORTED)
-+#undef ADD_FEATURE_IF_SUPPORTED
++  // decide if this trap can be handled by a stub
++  address stub = NULL;
 +
-+  _features_string = os::strdup(buf);
++  address pc          = NULL;
 +
-+#ifdef COMPILER2
-+  c2_initialize();
-+#endif // COMPILER2
-+}
++  //%note os_trap_1
++  if (info != NULL && uc != NULL && thread != NULL) {
++    pc = (address) os::Linux::ucontext_get_pc(uc);
 +
-+#ifdef COMPILER2
-+void VM_Version::c2_initialize() {
-+  if (UseCMoveUnconditionally) {
-+    FLAG_SET_DEFAULT(UseCMoveUnconditionally, false);
-+  }
++    // Handle ALL stack overflow variations here
++    if (sig == SIGSEGV) {
++      address addr = (address) info->si_addr;
 +
-+  if (ConditionalMoveLimit > 0) {
-+    FLAG_SET_DEFAULT(ConditionalMoveLimit, 0);
-+  }
++      // check if fault address is within thread stack
++      if (thread->on_local_stack(addr)) {
++        // stack overflow
++        if (thread->in_stack_yellow_reserved_zone(addr)) {
++          if (thread->thread_state() == _thread_in_Java) {
++            if (thread->in_stack_reserved_zone(addr)) {
++              frame fr;
++              if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) {
++                assert(fr.is_java_frame(), "Must be a Java frame");
++                frame activation =
++                  SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr);
++                if (activation.sp() != NULL) {
++                  thread->disable_stack_reserved_zone();
++                  if (activation.is_interpreted_frame()) {
++                    thread->set_reserved_stack_activation((address)(
++                      activation.fp() + frame::interpreter_frame_initial_sp_offset));
++                  } else {
++                    thread->set_reserved_stack_activation((address)activation.unextended_sp());
++                  }
++                  return 1;
++                }
++              }
++            }
++            // Throw a stack overflow exception.  Guard pages will be reenabled
++            // while unwinding the stack.
++            thread->disable_stack_yellow_reserved_zone();
++            stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
++          } else {
++            // Thread was in the vm or native code.  Return and try to finish.
++            thread->disable_stack_yellow_reserved_zone();
++            return 1;
++          }
++        } else if (thread->in_stack_red_zone(addr)) {
++          // Fatal red zone violation.  Disable the guard pages and fall through
++          // to handle_unexpected_exception way down below.
++          thread->disable_stack_red_zone();
++          tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
 +
-+  if (!UseRVV) {
-+    FLAG_SET_DEFAULT(SpecialEncodeISOArray, false);
-+  }
++          // This is a likely cause, but hard to verify. Let's just print
++          // it as a hint.
++          tty->print_raw_cr("Please check if any of your loaded .so files has "
++                            "enabled executable stack (see man page execstack(8))");
++        } else {
++          // Accessing stack address below sp may cause SEGV if current
++          // thread has MAP_GROWSDOWN stack. This should only happen when
++          // current thread was created by user code with MAP_GROWSDOWN flag
++          // and then attached to VM. See notes in os_linux.cpp.
++          if (thread->osthread()->expanding_stack() == 0) {
++             thread->osthread()->set_expanding_stack();
++             if (os::Linux::manually_expand_stack(thread, addr)) {
++               thread->osthread()->clear_expanding_stack();
++               return 1;
++             }
++             thread->osthread()->clear_expanding_stack();
++          } else {
++             fatal("recursive segv. expanding stack.");
++          }
++        }
++      }
++    }
 +
-+  if (!UseRVV && MaxVectorSize) {
-+    FLAG_SET_DEFAULT(MaxVectorSize, 0);
-+  }
++    if (thread->thread_state() == _thread_in_Java) {
++      // Java thread running in Java code => find exception handler if any
++      // a fault inside compiled code, the interpreter, or a stub
 +
-+  if (!UseRVV) {
-+    FLAG_SET_DEFAULT(UseRVVForBigIntegerShiftIntrinsics, false);
-+  }
++      // Handle signal from NativeJump::patch_verified_entry().
++      if ((sig == SIGILL || sig == SIGTRAP)
++          && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) {
++        if (TraceTraps) {
++          tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL");
++        }
++        stub = SharedRuntime::get_handle_wrong_method_stub();
++      } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) {
++        stub = SharedRuntime::get_poll_stub(pc);
++      } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) {
++        // BugId 4454115: A read from a MappedByteBuffer can fault
++        // here if the underlying file has been truncated.
++        // Do not crash the VM in such a case.
++        CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
++        CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
++        if (nm != NULL && nm->has_unsafe_access()) {
++          address next_pc = pc + NativeCall::instruction_size;
++          stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
++        }
++      } else if (sig == SIGFPE  &&
++                 (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) {
++        stub =
++          SharedRuntime::
++          continuation_for_implicit_exception(thread,
++                                              pc,
++                                              SharedRuntime::
++                                              IMPLICIT_DIVIDE_BY_ZERO);
++      } else if (sig == SIGSEGV &&
++               !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) {
++          // Determination of interpreter/vtable stub/compiled code null exception
++          stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
++      }
++    } else if (thread->thread_state() == _thread_in_vm &&
++               sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
++               thread->doing_unsafe_access()) {
++      address next_pc = pc + NativeCall::instruction_size;
++      stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
++    }
 +
-+  if (UseRVV) {
-+    if (FLAG_IS_DEFAULT(MaxVectorSize)) {
-+      MaxVectorSize = _initial_vector_length;
-+    } else if (MaxVectorSize < 16) {
-+      warning("RVV does not support vector length less than 16 bytes. Disabling RVV.");
-+      UseRVV = false;
-+    } else if (is_power_of_2(MaxVectorSize)) {
-+      if (MaxVectorSize > _initial_vector_length) {
-+        warning("Current system only supports max RVV vector length %d. Set MaxVectorSize to %d",
-+                _initial_vector_length, _initial_vector_length);
++    // jni_fast_Get<Primitive>Field can trap at certain pc's if a GC kicks in
++    // and the heap gets shrunk before the field access.
++    if ((sig == SIGSEGV) || (sig == SIGBUS)) {
++      address addr = JNI_FastGetField::find_slowcase_pc(pc);
++      if (addr != (address)-1) {
++        stub = addr;
 +      }
-+      MaxVectorSize = _initial_vector_length;
-+    } else {
-+      vm_exit_during_initialization(err_msg("Unsupported MaxVectorSize: %d", (int)MaxVectorSize));
 +    }
-+  }
 +
-+  // disable prefetch
-+  if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
-+    FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
++    // Check to see if we caught the safepoint code in the
++    // process of write protecting the memory serialization page.
++    // It write enables the page immediately after protecting it
++    // so we can just return to retry the write.
++    if ((sig == SIGSEGV) &&
++        os::is_memory_serialize_page(thread, (address) info->si_addr)) {
++      // Block current thread until the memory serialize page permission restored.
++      os::block_on_serialize_page_trap();
++      return true;
++    }
 +  }
 +
-+  if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
-+    FLAG_SET_DEFAULT(UseMulAddIntrinsic, true);
-+  }
++  if (stub != NULL) {
++    // save all thread context in case we need to restore it
++    if (thread != NULL) thread->set_saved_exception_pc(pc);
 +
-+  if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
-+    FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true);
++    os::Linux::ucontext_set_pc(uc, stub);
++    return true;
 +  }
 +
-+  if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
-+    FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, true);
++  // signal-chaining
++  if (os::Linux::chained_handler(sig, info, ucVoid)) {
++     return true;
 +  }
 +
-+  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
-+    FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, true);
++  if (!abort_if_unrecognized) {
++    // caller wants another chance, so give it to him
++    return false;
 +  }
 +
-+  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
-+    FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, true);
++  if (pc == NULL && uc != NULL) {
++    pc = os::Linux::ucontext_get_pc(uc);
 +  }
-+}
-+#endif // COMPILER2
 +
-+void VM_Version::initialize_cpu_information(void) {
-+  // do nothing if cpu info has been initialized
-+  if (_initialized) {
-+    return;
-+  }
++  // unmask current signal
++  sigset_t newset;
++  sigemptyset(&newset);
++  sigaddset(&newset, sig);
++  sigprocmask(SIG_UNBLOCK, &newset, NULL);
 +
-+  _no_of_cores  = os::processor_count();
-+  _no_of_threads = _no_of_cores;
-+  _no_of_sockets = _no_of_cores;
-+  snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64");
-+  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string);
-+  _initialized = true;
++  VMError::report_and_die(t, sig, pc, info, ucVoid);
++
++  ShouldNotReachHere();
++  return true; // Mute compiler
 +}
-diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
-new file mode 100644
-index 00000000000..8e35530359a
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
-@@ -0,0 +1,72 @@
-+/*
-+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
 +
-+#ifndef CPU_RISCV_VM_VERSION_RISCV_HPP
-+#define CPU_RISCV_VM_VERSION_RISCV_HPP
++void os::Linux::init_thread_fpu_state(void) {
++}
 +
-+#include "runtime/abstract_vm_version.hpp"
-+#include "runtime/arguments.hpp"
-+#include "runtime/globals_extension.hpp"
-+#include "utilities/sizes.hpp"
-+
-+class VM_Version : public Abstract_VM_Version {
-+#ifdef COMPILER2
-+private:
-+  static void c2_initialize();
-+#endif // COMPILER2
++int os::Linux::get_fpu_control_word(void) {
++  return 0;
++}
 +
-+protected:
-+  static const char* _uarch;
-+  static uint32_t _initial_vector_length;
-+  static void get_os_cpu_info();
-+  static uint32_t get_current_vector_length();
++void os::Linux::set_fpu_control_word(int fpu_control) {
++}
 +
-+public:
-+  // Initialization
-+  static void initialize();
++////////////////////////////////////////////////////////////////////////////////
++// thread stack
 +
-+  constexpr static bool supports_stack_watermark_barrier() { return true; }
++// Minimum usable stack sizes required to get to user code. Space for
++// HotSpot guard pages is added later.
++size_t os::Posix::_compiler_thread_min_stack_allowed = 72 * K;
++size_t os::Posix::_java_thread_min_stack_allowed = 72 * K;
++size_t os::Posix::_vm_internal_thread_min_stack_allowed = 72 * K;
 +
-+  enum Feature_Flag {
-+#define CPU_FEATURE_FLAGS(decl)               \
-+    decl(I,            "i",            8)     \
-+    decl(M,            "m",           12)     \
-+    decl(A,            "a",            0)     \
-+    decl(F,            "f",            5)     \
-+    decl(D,            "d",            3)     \
-+    decl(C,            "c",            2)     \
-+    decl(V,            "v",           21)     \
-+    decl(B,            "b",            1)
++// return default stack size for thr_type
++size_t os::Posix::default_stack_size(os::ThreadType thr_type) {
++  // default stack size (compiler thread needs larger stack)
++  size_t s = (thr_type == os::compiler_thread ? 4 * M : 1 * M);
++  return s;
++}
 +
-+#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1 << bit),
-+    CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)
-+#undef DECLARE_CPU_FEATURE_FLAG
-+  };
++/////////////////////////////////////////////////////////////////////////////
++// helper functions for fatal error handler
 +
-+  static void initialize_cpu_information(void);
++static const char* reg_abi_names[] = {
++  "pc",
++  "x1(ra)", "x2(sp)", "x3(gp)", "x4(tp)",
++  "x5(t0)", "x6(t1)", "x7(t2)",
++  "x8(s0)", "x9(s1)",
++  "x10(a0)", "x11(a1)", "x12(a2)", "x13(a3)", "x14(a4)", "x15(a5)", "x16(a6)", "x17(a7)",
++  "x18(s2)", "x19(s3)", "x20(s4)", "x21(s5)", "x22(s6)", "x23(s7)", "x24(s8)", "x25(s9)", "x26(s10)", "x27(s11)",
++  "x28(t3)", "x29(t4)","x30(t5)", "x31(t6)"
 +};
 +
-+#endif // CPU_RISCV_VM_VERSION_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
++void os::print_context(outputStream *st, const void *context) {
++  if (context == NULL) {
++    return;
++  }
++
++  const ucontext_t *uc = (const ucontext_t*)context;
++  st->print_cr("Registers:");
++  for (int r = 0; r < 32; r++) {
++    st->print("%-*.*s=", 8, 8, reg_abi_names[r]);
++    print_location(st, uc->uc_mcontext.__gregs[r]);
++  }
++  st->cr();
++
++  intptr_t *frame_sp = (intptr_t *)os::Linux::ucontext_get_sp(uc);
++  st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(frame_sp));
++  print_hex_dump(st, (address)frame_sp, (address)(frame_sp + 64), sizeof(intptr_t));
++  st->cr();
++
++  // Note: it may be unsafe to inspect memory near pc. For example, pc may
++  // point to garbage if entry point in an nmethod is corrupted. Leave
++  // this at the end, and hope for the best.
++  address pc = os::Linux::ucontext_get_pc(uc);
++  print_instructions(st, pc, sizeof(char));
++  st->cr();
++}
++
++void os::print_register_info(outputStream *st, const void *context) {
++  if (context == NULL) {
++    return;
++  }
++
++  const ucontext_t *uc = (const ucontext_t*)context;
++
++  st->print_cr("Register to memory mapping:");
++  st->cr();
++
++  // this is horrendously verbose but the layout of the registers in the
++  // context does not match how we defined our abstract Register set, so
++  // we can't just iterate through the gregs area
++
++  // this is only for the "general purpose" registers
++
++  for (int r = 0; r < 32; r++)
++    st->print_cr("%-*.*s=" INTPTR_FORMAT, 8, 8, reg_abi_names[r], (uintptr_t)uc->uc_mcontext.__gregs[r]);
++  st->cr();
++}
++
++void os::setup_fpu() {
++}
++
++#ifndef PRODUCT
++void os::verify_stack_alignment() {
++  assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment");
++}
++#endif
++
++int os::extra_bang_size_in_bytes() {
++  return 0;
++}
++
++extern "C" {
++  int SpinPause() {
++    return 0;
++  }
++
++  void _Copy_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
++    if (from > to) {
++      const jshort *end = from + count;
++      while (from < end) {
++        *(to++) = *(from++);
++      }
++    } else if (from < to) {
++      const jshort *end = from;
++      from += count - 1;
++      to   += count - 1;
++      while (from >= end) {
++        *(to--) = *(from--);
++      }
++    }
++  }
++  void _Copy_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
++    if (from > to) {
++      const jint *end = from + count;
++      while (from < end) {
++        *(to++) = *(from++);
++      }
++    } else if (from < to) {
++      const jint *end = from;
++      from += count - 1;
++      to   += count - 1;
++      while (from >= end) {
++        *(to--) = *(from--);
++      }
++    }
++  }
++  void _Copy_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
++    if (from > to) {
++      const jlong *end = from + count;
++      while (from < end) {
++        os::atomic_copy64(from++, to++);
++      }
++    } else if (from < to) {
++      const jlong *end = from;
++      from += count - 1;
++      to   += count - 1;
++      while (from >= end) {
++        os::atomic_copy64(from--, to--);
++      }
++    }
++  }
++
++  void _Copy_arrayof_conjoint_bytes(const HeapWord* from,
++                                    HeapWord* to,
++                                    size_t    count) {
++    memmove(to, from, count);
++  }
++  void _Copy_arrayof_conjoint_jshorts(const HeapWord* from,
++                                      HeapWord* to,
++                                      size_t    count) {
++    memmove(to, from, count * 2);
++  }
++  void _Copy_arrayof_conjoint_jints(const HeapWord* from,
++                                    HeapWord* to,
++                                    size_t    count) {
++    memmove(to, from, count * 4);
++  }
++  void _Copy_arrayof_conjoint_jlongs(const HeapWord* from,
++                                     HeapWord* to,
++                                     size_t    count) {
++    memmove(to, from, count * 8);
++  }
++};
+diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp
 new file mode 100644
-index 00000000000..aa7222dc64a
+index 0000000000..f3e3a73bc5
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
-@@ -0,0 +1,64 @@
++++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp
+@@ -0,0 +1,40 @@
 +/*
-+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -56211,54 +53165,74 @@ index 00000000000..aa7222dc64a
 + *
 + */
 +
-+#include "precompiled.hpp"
-+#include "asm/assembler.hpp"
-+#include "code/vmreg.hpp"
++#ifndef OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP
++#define OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP
 +
-+void VMRegImpl::set_regName() {
-+  int i = 0;
-+  Register reg = ::as_Register(0);
-+  for ( ; i < ConcreteRegisterImpl::max_gpr ; ) {
-+    for (int j = 0 ; j < RegisterImpl::max_slots_per_register ; j++) {
-+      regName[i++] = reg->name();
-+    }
-+    reg = reg->successor();
-+  }
++  static void setup_fpu();
 +
-+  FloatRegister freg = ::as_FloatRegister(0);
-+  for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
-+    for (int j = 0 ; j < FloatRegisterImpl::max_slots_per_register ; j++) {
-+      regName[i++] = reg->name();
-+    }
-+    freg = freg->successor();
-+  }
++  // Used to register dynamic code cache area with the OS
++  // Note: Currently only used in 64 bit Windows implementations
++  static bool register_code_area(char *low, char *high) { return true; }
 +
-+  VectorRegister vreg = ::as_VectorRegister(0);
-+  for ( ; i < ConcreteRegisterImpl::max_vpr ; ) {
-+    for (int j = 0 ; j < VectorRegisterImpl::max_slots_per_register ; j++) {
-+      regName[i++] = reg->name();
-+    }
-+    vreg = vreg->successor();
++  // Atomically copy 64 bits of data
++  static void atomic_copy64(const volatile void *src, volatile void *dst) {
++    *(jlong *) dst = *(const jlong *) src;
 +  }
 +
-+  for ( ; i < ConcreteRegisterImpl::number_of_registers ; i++) {
-+    regName[i] = "NON-GPR-FPR-VPR";
-+  }
++#endif // OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP
+diff --git a/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
+new file mode 100644
+index 0000000000..2bd48e09c3
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
+@@ -0,0 +1,38 @@
++/*
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP
++#define OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP
++
++#include "runtime/prefetch.hpp"
++
++
++inline void Prefetch::read (void *loc, intx interval) {
 +}
 +
-+VMReg VMRegImpl::vmStorageToVMReg(int type, int index) {
-+  Unimplemented();
-+  return VMRegImpl::Bad();
++inline void Prefetch::write(void *loc, intx interval) {
 +}
-diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.hpp
++
++#endif // OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP
+diff --git a/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.cpp b/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.cpp
 new file mode 100644
-index 00000000000..9e611b1f671
+index 0000000000..ffcd819487
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/vmreg_riscv.hpp
-@@ -0,0 +1,68 @@
++++ b/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.cpp
+@@ -0,0 +1,77 @@
 +/*
-+ * Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2023, Rivos Inc. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -56281,58 +53255,67 @@ index 00000000000..9e611b1f671
 + *
 + */
 +
-+#ifndef CPU_RISCV_VMREG_RISCV_HPP
-+#define CPU_RISCV_VMREG_RISCV_HPP
++#include "precompiled.hpp"
++#include "logging/log.hpp"
++#include "riscv_flush_icache.hpp"
++#include "runtime/os.hpp"
++#include "runtime/vm_version.hpp"
++#include "utilities/debug.hpp"
 +
-+inline bool is_Register() {
-+  return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr;
-+}
++#include <sys/syscall.h>
++#include <unistd.h>
 +
-+inline bool is_FloatRegister() {
-+  return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr;
-+}
++#define check_with_errno(check_type, cond, msg)                             \
++  do {                                                                      \
++    int err = errno;                                                        \
++    check_type(cond, "%s; error='%s' (errno=%s)", msg, os::strerror(err),   \
++               os::errno_name(err));                                        \
++} while (false)
 +
-+inline bool is_VectorRegister() {
-+  return value() >= ConcreteRegisterImpl::max_fpr && value() < ConcreteRegisterImpl::max_vpr;
-+}
++#define assert_with_errno(cond, msg)    check_with_errno(assert, cond, msg)
++#define guarantee_with_errno(cond, msg) check_with_errno(guarantee, cond, msg)
 +
-+inline Register as_Register() {
-+  assert(is_Register(), "must be");
-+  return ::as_Register(value() / RegisterImpl::max_slots_per_register);
-+}
++#ifndef NR_riscv_flush_icache
++#ifndef NR_arch_specific_syscall
++#define NR_arch_specific_syscall 244
++#endif
++#define NR_riscv_flush_icache (NR_arch_specific_syscall + 15)
++#endif
 +
-+inline FloatRegister as_FloatRegister() {
-+  assert(is_FloatRegister() && is_even(value()), "must be");
-+  return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) /
-+                            FloatRegisterImpl::max_slots_per_register);
-+}
++#define SYS_RISCV_FLUSH_ICACHE_LOCAL 1UL
++#define SYS_RISCV_FLUSH_ICACHE_ALL   0UL
 +
-+inline VectorRegister as_VectorRegister() {
-+  assert(is_VectorRegister() && ((value() & (VectorRegisterImpl::max_slots_per_register - 1)) == 0), "must be");
-+  return ::as_VectorRegister((value() - ConcreteRegisterImpl::max_fpr) /
-+                             VectorRegisterImpl::max_slots_per_register);
++static long sys_flush_icache(uintptr_t start, uintptr_t end , uintptr_t flags) {
++  return syscall(NR_riscv_flush_icache, start, end, flags);
 +}
 +
-+inline bool is_concrete() {
-+  assert(is_reg(), "must be");
-+  if (is_VectorRegister()) {
-+    int base = value() - ConcreteRegisterImpl::max_fpr;
-+    return (base % VectorRegisterImpl::max_slots_per_register) == 0;
-+  } else {
-+    return is_even(value());
++bool RiscvFlushIcache::test() {
++  ATTRIBUTE_ALIGNED(64) char memory[64];
++  long ret = sys_flush_icache((uintptr_t)&memory[0],
++                              (uintptr_t)&memory[sizeof(memory) - 1],
++                              SYS_RISCV_FLUSH_ICACHE_ALL);
++  if (ret == 0) {
++    return true;
 +  }
++  int err = errno;                                                        \
++  log_error(os)("Syscall: RISCV_FLUSH_ICACHE not available; error='%s' (errno=%s)",
++                os::strerror(err), os::errno_name(err));
++  return false;
 +}
 +
-+#endif // CPU_RISCV_VMREG_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp
++void RiscvFlushIcache::flush(uintptr_t start, uintptr_t end) {
++  long ret = sys_flush_icache(start, end, SYS_RISCV_FLUSH_ICACHE_ALL);
++  guarantee_with_errno(ret == 0, "riscv_flush_icache failed");
++}
+diff --git a/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.hpp b/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.hpp
 new file mode 100644
-index 00000000000..06b70020b4b
+index 0000000000..f4e7263b39
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp
-@@ -0,0 +1,46 @@
++++ b/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.hpp
+@@ -0,0 +1,39 @@
 +/*
-+ * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2023, Rivos Inc. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -56355,37 +53338,29 @@ index 00000000000..06b70020b4b
 + *
 + */
 +
-+#ifndef CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP
-+#define CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP
-+
-+inline VMReg RegisterImpl::as_VMReg() const {
-+  if (this == noreg) {
-+    return VMRegImpl::Bad();
-+  }
-+  return VMRegImpl::as_VMReg(encoding() * RegisterImpl::max_slots_per_register);
-+}
++#ifndef OS_LINUX_RISCV_FLUSH_ICACHE_LINUX_HPP
++#define OS_LINUX_RISCV_FLUSH_ICACHE_LINUX_HPP
 +
-+inline VMReg FloatRegisterImpl::as_VMReg() const {
-+  return VMRegImpl::as_VMReg((encoding() * FloatRegisterImpl::max_slots_per_register) +
-+                             ConcreteRegisterImpl::max_gpr);
-+}
++#include "memory/allocation.hpp"
++#include "runtime/vm_version.hpp"
++#include "utilities/growableArray.hpp"
 +
-+inline VMReg VectorRegisterImpl::as_VMReg() const {
-+  return VMRegImpl::as_VMReg((encoding() * VectorRegisterImpl::max_slots_per_register) +
-+                             ConcreteRegisterImpl::max_fpr);
-+}
++class RiscvFlushIcache: public AllStatic {
++ public:
++  static bool test();
++  static void flush(uintptr_t start, uintptr_t end);
++};
 +
-+#endif // CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP
-diff --git a/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp
++#endif // OS_LINUX_RISCV_FLUSH_ICACHE_LINUX_HPP
+diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
 new file mode 100644
-index 00000000000..78b81138003
+index 0000000000..ccceed643e
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp
-@@ -0,0 +1,260 @@
++++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
+@@ -0,0 +1,100 @@
 +/*
-+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -56409,340 +53384,161 @@ index 00000000000..78b81138003
 + */
 +
 +#include "precompiled.hpp"
-+#include "asm/macroAssembler.inline.hpp"
-+#include "assembler_riscv.inline.hpp"
-+#include "code/vtableStubs.hpp"
-+#include "interp_masm_riscv.hpp"
-+#include "memory/resourceArea.hpp"
-+#include "oops/compiledICHolder.hpp"
-+#include "oops/instanceKlass.hpp"
-+#include "oops/klassVtable.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "vmreg_riscv.inline.hpp"
-+#ifdef COMPILER2
-+#include "opto/runtime.hpp"
-+#endif
++#include "memory/metaspaceShared.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/thread.inline.hpp"
 +
-+// machine-dependent part of VtableStubs: create VtableStub of correct size and
-+// initialize its code
++frame JavaThread::pd_last_frame() {
++  assert(has_last_Java_frame(), "must have last_Java_sp() when suspended");
++  return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc());
++}
 +
-+#define __ masm->
++// For Forte Analyzer AsyncGetCallTrace profiling support - thread is
++// currently interrupted by SIGPROF
++bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr,
++  void* ucontext, bool isInJava) {
 +
-+#ifndef PRODUCT
-+extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index);
-+#endif
++  assert(Thread::current() == this, "caller must be current thread");
++  return pd_get_top_frame(fr_addr, ucontext, isInJava);
++}
 +
-+VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
-+  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
-+  const int stub_code_length = code_size_limit(true);
-+  VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
-+  // Can be NULL if there is no free space in the code cache.
-+  if (s == NULL) {
-+    return NULL;
-+  }
++bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) {
++  return pd_get_top_frame(fr_addr, ucontext, isInJava);
++}
 +
-+  // Count unused bytes in instruction sequences of variable size.
-+  // We add them to the computed buffer size in order to avoid
-+  // overflow in subsequently generated stubs.
-+  address   start_pc = NULL;
-+  int       slop_bytes = 0;
-+  int       slop_delta = 0;
++bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) {
++  // If we have a last_Java_frame, then we should use it even if
++  // isInJava == true.  It should be more reliable than ucontext info.
++  if (has_last_Java_frame() && frame_anchor()->walkable()) {
++    *fr_addr = pd_last_frame();
++    return true;
++  }
 +
-+  ResourceMark    rm;
-+  CodeBuffer      cb(s->entry_point(), stub_code_length);
-+  MacroAssembler* masm = new MacroAssembler(&cb);
-+  assert_cond(masm != NULL);
++  // At this point, we don't have a last_Java_frame, so
++  // we try to glean some information out of the ucontext
++  // if we were running Java code when SIGPROF came in.
++  if (isInJava) {
++    ucontext_t* uc = (ucontext_t*) ucontext;
 +
-+#if (!defined(PRODUCT) && defined(COMPILER2))
-+  if (CountCompiledCalls) {
-+    __ la(t2, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
-+    __ add_memory_int64(Address(t2), 1);
-+  }
-+#endif
++    intptr_t* ret_fp = NULL;
++    intptr_t* ret_sp = NULL;
++    ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc,
++      &ret_sp, &ret_fp);
++    if (addr.pc() == NULL || ret_sp == NULL ) {
++      // ucontext wasn't useful
++      return false;
++    }
 +
-+  // get receiver (need to skip return address on top of stack)
-+  assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
++    if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) {
++      // In the middle of a trampoline call. Bail out for safety.
++      // This happens rarely so shouldn't affect profiling.
++      return false;
++    }
 +
-+  // get receiver klass
-+  address npe_addr = __ pc();
-+  __ load_klass(t2, j_rarg0);
++    frame ret_frame(ret_sp, ret_fp, addr.pc());
++    if (!ret_frame.safe_for_sender(this)) {
++#ifdef COMPILER2
++      frame ret_frame2(ret_sp, NULL, addr.pc());
++      if (!ret_frame2.safe_for_sender(this)) {
++        // nothing else to try if the frame isn't good
++        return false;
++      }
++      ret_frame = ret_frame2;
++#else
++      // nothing else to try if the frame isn't good
++      return false;
++#endif /* COMPILER2 */
++    }
++    *fr_addr = ret_frame;
++    return true;
++  }
 +
-+#ifndef PRODUCT
-+  if (DebugVtables) {
-+    Label L;
-+    start_pc = __ pc();
++  // nothing else to try
++  return false;
++}
 +
-+    // check offset vs vtable length
-+    __ lwu(t0, Address(t2, Klass::vtable_length_offset()));
-+    __ mvw(t1, vtable_index * vtableEntry::size());
-+    __ bgt(t0, t1, L);
-+    __ enter();
-+    __ mv(x12, vtable_index);
++void JavaThread::cache_global_variables() { }
+diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
+new file mode 100644
+index 0000000000..4b91fa855a
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
+@@ -0,0 +1,67 @@
++/*
++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+    __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, x12);
-+    const ptrdiff_t estimate = 256;
-+    const ptrdiff_t codesize = __ pc() - start_pc;
-+    slop_delta = estimate - codesize;  // call_VM varies in length, depending on data
-+    slop_bytes += slop_delta;
-+    assert(slop_delta >= 0, "vtable #%d: Code size estimate (%d) for DebugVtables too small, required: %d", vtable_index, (int)estimate, (int)codesize);
++#ifndef OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP
++#define OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP
 +
-+    __ leave();
-+    __ bind(L);
++ private:
++  void pd_initialize() {
++    _anchor.clear();
 +  }
-+#endif // PRODUCT
 +
-+  start_pc = __ pc();
-+  __ lookup_virtual_method(t2, vtable_index, xmethod);
-+  // lookup_virtual_method generates
-+  // 4 instructions (maximum value encountered in normal case):li(lui + addiw) + add + ld
-+  // 1 instruction (best case):ld * 1
-+  slop_delta = 16 - (int)(__ pc() - start_pc);
-+  slop_bytes += slop_delta;
-+  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
-+
-+#ifndef PRODUCT
-+  if (DebugVtables) {
-+    Label L;
-+    __ beqz(xmethod, L);
-+    __ ld(t0, Address(xmethod, Method::from_compiled_offset()));
-+    __ bnez(t0, L);
-+    __ stop("Vtable entry is NULL");
-+    __ bind(L);
-+  }
-+#endif // PRODUCT
-+
-+  // x10: receiver klass
-+  // xmethod: Method*
-+  // x12: receiver
-+  address ame_addr = __ pc();
-+  __ ld(t0, Address(xmethod, Method::from_compiled_offset()));
-+  __ jr(t0);
-+
-+  masm->flush();
-+  bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0);
++  frame pd_last_frame();
 +
-+  return s;
-+}
++ public:
++  // Mutators are highly dangerous....
++  intptr_t* last_Java_fp()                       { return _anchor.last_Java_fp(); }
++  void  set_last_Java_fp(intptr_t* fp)           { _anchor.set_last_Java_fp(fp);   }
 +
-+VtableStub* VtableStubs::create_itable_stub(int itable_index) {
-+  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
-+  const int stub_code_length = code_size_limit(false);
-+  VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
-+  // Can be NULL if there is no free space in the code cache.
-+  if (s == NULL) {
-+    return NULL;
++  void set_base_of_stack_pointer(intptr_t* base_sp) {
 +  }
-+  // Count unused bytes in instruction sequences of variable size.
-+  // We add them to the computed buffer size in order to avoid
-+  // overflow in subsequently generated stubs.
-+  address   start_pc = NULL;
-+  int       slop_bytes = 0;
-+  int       slop_delta = 0;
-+
-+  ResourceMark    rm;
-+  CodeBuffer      cb(s->entry_point(), stub_code_length);
-+  MacroAssembler* masm = new MacroAssembler(&cb);
-+  assert_cond(masm != NULL);
 +
-+#if (!defined(PRODUCT) && defined(COMPILER2))
-+  if (CountCompiledCalls) {
-+    __ la(x18, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
-+    __ add_memory_int64(Address(x18), 1);
++  static ByteSize last_Java_fp_offset()          {
++    return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset();
 +  }
-+#endif
-+
-+  // get receiver (need to skip return address on top of stack)
-+  assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
-+
-+  // Entry arguments:
-+  //  t2: CompiledICHolder
-+  //  j_rarg0: Receiver
-+
-+  // This stub is called from compiled code which has no callee-saved registers,
-+  // so all registers except arguments are free at this point.
-+  const Register recv_klass_reg     = x18;
-+  const Register holder_klass_reg   = x19; // declaring interface klass (DECC)
-+  const Register resolved_klass_reg = xmethod; // resolved interface klass (REFC)
-+  const Register temp_reg           = x28;
-+  const Register temp_reg2          = x29;
-+  const Register icholder_reg       = t1;
-+
-+  Label L_no_such_interface;
-+
-+  __ ld(resolved_klass_reg, Address(icholder_reg, CompiledICHolder::holder_klass_offset()));
-+  __ ld(holder_klass_reg,   Address(icholder_reg, CompiledICHolder::holder_metadata_offset()));
-+
-+  start_pc = __ pc();
-+
-+  // get receiver klass (also an implicit null-check)
-+  address npe_addr = __ pc();
-+  __ load_klass(recv_klass_reg, j_rarg0);
-+
-+  // Receiver subtype check against REFC.
-+  __ lookup_interface_method(// inputs: rec. class, interface
-+                             recv_klass_reg, resolved_klass_reg, noreg,
-+                             // outputs:  scan temp. reg1, scan temp. reg2
-+                             temp_reg2, temp_reg,
-+                             L_no_such_interface,
-+                             /*return_method=*/false);
-+
-+  const ptrdiff_t typecheckSize = __ pc() - start_pc;
-+  start_pc = __ pc();
-+
-+  // Get selected method from declaring class and itable index
-+  __ lookup_interface_method(// inputs: rec. class, interface, itable index
-+                             recv_klass_reg, holder_klass_reg, itable_index,
-+                             // outputs: method, scan temp. reg
-+                             xmethod, temp_reg,
-+                             L_no_such_interface);
-+
-+  const ptrdiff_t lookupSize = __ pc() - start_pc;
-+
-+  // Reduce "estimate" such that "padding" does not drop below 8.
-+  const ptrdiff_t estimate = 256;
-+  const ptrdiff_t codesize = typecheckSize + lookupSize;
-+  slop_delta = (int)(estimate - codesize);
-+  slop_bytes += slop_delta;
-+  assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize);
 +
-+#ifdef ASSERT
-+  if (DebugVtables) {
-+    Label L2;
-+    __ beqz(xmethod, L2);
-+    __ ld(t0, Address(xmethod, Method::from_compiled_offset()));
-+    __ bnez(t0, L2);
-+    __ stop("compiler entrypoint is null");
-+    __ bind(L2);
++  intptr_t* base_of_stack_pointer() {
++    return NULL;
++  }
++  void record_base_of_stack_pointer() {
 +  }
-+#endif // ASSERT
-+
-+  // xmethod: Method*
-+  // j_rarg0: receiver
-+  address ame_addr = __ pc();
-+  __ ld(t0, Address(xmethod, Method::from_compiled_offset()));
-+  __ jr(t0);
-+
-+  __ bind(L_no_such_interface);
-+  // Handle IncompatibleClassChangeError in itable stubs.
-+  // More detailed error message.
-+  // We force resolving of the call site by jumping to the "handle
-+  // wrong method" stub, and so let the interpreter runtime do all the
-+  // dirty work.
-+  assert(SharedRuntime::get_handle_wrong_method_stub() != NULL, "check initialization order");
-+  __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
-+
-+  masm->flush();
-+  bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0);
-+
-+  return s;
-+}
 +
-+int VtableStub::pd_code_alignment() {
-+  // RISCV cache line size is not an architected constant. We just align on word size.
-+  const unsigned int icache_line_size = wordSize;
-+  return icache_line_size;
-+}
-diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
-index 897be2209e2..ee298f56653 100644
---- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
-+++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
-@@ -1,6 +1,6 @@
- /*
-- * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2016, 2019, SAP SE. All rights reserved.
-+ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2016, 2019 SAP SE. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -1447,7 +1447,10 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op
- }
- 
- // result = condition ? opr1 : opr2
--void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
-+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
-+                          LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
-+  assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on s390");
-+
-   Assembler::branch_condition acond = Assembler::bcondEqual, ncond = Assembler::bcondNotEqual;
-   switch (condition) {
-     case lir_cond_equal:        acond = Assembler::bcondEqual;    ncond = Assembler::bcondNotEqual; break;
-diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
-index cee3140f4f7..82e9de5a06f 100644
---- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
-+++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -1970,7 +1970,10 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
-   }
- }
- 
--void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
-+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
-+                          LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
-+  assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on x86");
++  bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext,
++    bool isInJava);
 +
-   Assembler::Condition acond, ncond;
-   switch (condition) {
-     case lir_cond_equal:        acond = Assembler::equal;        ncond = Assembler::notEqual;     break;
-diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
-index 3799adf5dd9..6f75e623a9a 100644
---- a/src/hotspot/os/linux/os_linux.cpp
-+++ b/src/hotspot/os/linux/os_linux.cpp
-@@ -2845,6 +2845,8 @@ void os::get_summary_cpu_info(char* cpuinfo, size_t length) {
-   strncpy(cpuinfo, "IA64", length);
- #elif defined(PPC)
-   strncpy(cpuinfo, "PPC64", length);
-+#elif defined(RISCV)
-+  strncpy(cpuinfo, "RISCV64", length);
- #elif defined(S390)
-   strncpy(cpuinfo, "S390", length);
- #elif defined(SPARC)
-diff --git a/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp
-new file mode 100644
-index 00000000000..f2610af6cdd
---- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp
-@@ -0,0 +1,26 @@
-+/*
-+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++  bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava);
++private:
++  bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava);
++public:
++  // These routines are only used on cpu architectures that
++  // have separate register stacks (Itanium).
++  static bool register_stack_overflow() { return false; }
++  static void enable_register_stack_guard() {}
++  static void disable_register_stack_guard() {}
 +
-+// nothing required here
-diff --git a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
++#endif // OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP
+diff --git a/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp
 new file mode 100644
-index 00000000000..761da5d743e
+index 0000000000..6cf7683a58
 --- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
-@@ -0,0 +1,134 @@
++++ b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp
+@@ -0,0 +1,55 @@
 +/*
-+ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -56766,175 +53562,45 @@ index 00000000000..761da5d743e
 + *
 + */
 +
-+#ifndef OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
-+#define OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
-+
-+#include "runtime/vm_version.hpp"
-+
-+// Implementation of class atomic
-+
-+// Note that memory_order_conservative requires a full barrier after atomic stores.
-+// See https://patchwork.kernel.org/patch/3575821/
-+
-+template<size_t byte_size>
-+struct Atomic::PlatformAdd {
-+  template<typename D, typename I>
-+  D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const {
-+    D res = __atomic_add_fetch(dest, add_value, __ATOMIC_RELEASE);
-+    FULL_MEM_BARRIER;
-+    return res;
-+  }
-+
-+  template<typename D, typename I>
-+  D fetch_and_add(D volatile* dest, I add_value, atomic_memory_order order) const {
-+    return add_and_fetch(dest, add_value, order) - add_value;
-+  }
-+};
-+
-+template<size_t byte_size>
-+template<typename T>
-+inline T Atomic::PlatformXchg<byte_size>::operator()(T volatile* dest,
-+                                                     T exchange_value,
-+                                                     atomic_memory_order order) const {
-+  STATIC_ASSERT(byte_size == sizeof(T));
-+  T res = __atomic_exchange_n(dest, exchange_value, __ATOMIC_RELEASE);
-+  FULL_MEM_BARRIER;
-+  return res;
-+}
-+
-+// __attribute__((unused)) on dest is to get rid of spurious GCC warnings.
-+template<size_t byte_size>
-+template<typename T>
-+inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T volatile* dest __attribute__((unused)),
-+                                                        T compare_value,
-+                                                        T exchange_value,
-+                                                        atomic_memory_order order) const {
-+  STATIC_ASSERT(byte_size == sizeof(T));
-+  T value = compare_value;
-+  if (order != memory_order_relaxed) {
-+    FULL_MEM_BARRIER;
-+  }
-+
-+  __atomic_compare_exchange(dest, &value, &exchange_value, /* weak */ false,
-+                            __ATOMIC_RELAXED, __ATOMIC_RELAXED);
-+
-+  if (order != memory_order_relaxed) {
-+    FULL_MEM_BARRIER;
-+  }
-+  return value;
-+}
-+
-+template<>
-+template<typename T>
-+inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest __attribute__((unused)),
-+                                                T compare_value,
-+                                                T exchange_value,
-+                                                atomic_memory_order order) const {
-+  STATIC_ASSERT(4 == sizeof(T));
-+  if (order != memory_order_relaxed) {
-+    FULL_MEM_BARRIER;
-+  }
-+  T rv;
-+  int tmp;
-+  __asm volatile(
-+    "1:\n\t"
-+    " addiw     %[tmp], %[cv], 0\n\t" // make sure compare_value signed_extend
-+    " lr.w.aq   %[rv], (%[dest])\n\t"
-+    " bne       %[rv], %[tmp], 2f\n\t"
-+    " sc.w.rl   %[tmp], %[ev], (%[dest])\n\t"
-+    " bnez      %[tmp], 1b\n\t"
-+    "2:\n\t"
-+    : [rv] "=&r" (rv), [tmp] "=&r" (tmp)
-+    : [ev] "r" (exchange_value), [dest] "r" (dest), [cv] "r" (compare_value)
-+    : "memory");
-+  if (order != memory_order_relaxed) {
-+    FULL_MEM_BARRIER;
-+  }
-+  return rv;
-+}
-+
-+template<size_t byte_size>
-+struct Atomic::PlatformOrderedLoad<byte_size, X_ACQUIRE>
-+{
-+  template <typename T>
-+  T operator()(const volatile T* p) const { T data; __atomic_load(const_cast<T*>(p), &data, __ATOMIC_ACQUIRE); return data; }
-+};
-+
-+template<size_t byte_size>
-+struct Atomic::PlatformOrderedStore<byte_size, RELEASE_X>
-+{
-+  template <typename T>
-+  void operator()(volatile T* p, T v) const { __atomic_store(const_cast<T*>(p), &v, __ATOMIC_RELEASE); }
-+};
-+
-+template<size_t byte_size>
-+struct Atomic::PlatformOrderedStore<byte_size, RELEASE_X_FENCE>
-+{
-+  template <typename T>
-+  void operator()(volatile T* p, T v) const { release_store(p, v); OrderAccess::fence(); }
-+};
++#ifndef OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP
++#define OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP
 +
-+#endif // OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
-diff --git a/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp
-new file mode 100644
-index 00000000000..28868c76406
---- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp
-@@ -0,0 +1,45 @@
-+/*
-+ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++// These are the OS and CPU-specific fields, types and integer
++// constants required by the Serviceability Agent. This file is
++// referenced by vmStructs.cpp.
 +
-+#ifndef OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP
-+#define OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP
++#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
++                                                                                                                                     \
++  /******************************/                                                                                                   \
++  /* Threads (NOTE: incomplete) */                                                                                                   \
++  /******************************/                                                                                                   \
++  nonstatic_field(OSThread,                      _thread_id,                                      OSThread::thread_id_t)             \
++  nonstatic_field(OSThread,                      _pthread_id,                                     pthread_t)
 +
-+#include <byteswap.h>
 +
-+// Efficient swapping of data bytes from Java byte
-+// ordering to native byte ordering and vice versa.
-+inline u2   Bytes::swap_u2(u2 x) {
-+  return bswap_16(x);
-+}
++#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
++                                                                          \
++  /**********************/                                                \
++  /* Posix Thread IDs   */                                                \
++  /**********************/                                                \
++                                                                          \
++  declare_integer_type(OSThread::thread_id_t)                             \
++  declare_unsigned_integer_type(pthread_t)
 +
-+inline u4   Bytes::swap_u4(u4 x) {
-+  return bswap_32(x);
-+}
++#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
 +
-+inline u8 Bytes::swap_u8(u8 x) {
-+  return bswap_64(x);
-+}
++#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
 +
-+#endif // OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP
-diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp
++#endif // OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP
+diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
 new file mode 100644
-index 00000000000..147cfdf3c10
+index 0000000000..8bcc949fed
 --- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp
-@@ -0,0 +1,31 @@
++++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
+@@ -0,0 +1,137 @@
 +/*
-+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -56957,724 +53623,1031 @@ index 00000000000..147cfdf3c10
 + *
 + */
 +
-+#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP
-+#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP
++#include "precompiled.hpp"
++#include "asm/register.hpp"
++#include "runtime/os.hpp"
++#include "runtime/os.inline.hpp"
++#include "runtime/vm_version.hpp"
 +
-+// Empty for build system
++#include <asm/hwcap.h>
++#include <sys/auxv.h>
 +
-+#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP
-diff --git a/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp
-new file mode 100644
-index 00000000000..1aa58f27871
---- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp
-@@ -0,0 +1,42 @@
-+/*
-+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++#ifndef HWCAP_ISA_I
++#define HWCAP_ISA_I  (1 << ('I' - 'A'))
++#endif
 +
-+#ifndef OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP
-+#define OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP
++#ifndef HWCAP_ISA_M
++#define HWCAP_ISA_M  (1 << ('M' - 'A'))
++#endif
 +
-+#include <sys/syscall.h>
++#ifndef HWCAP_ISA_A
++#define HWCAP_ISA_A  (1 << ('A' - 'A'))
++#endif
 +
-+//
-+// Support for building on older Linux systems
-+//
++#ifndef HWCAP_ISA_F
++#define HWCAP_ISA_F  (1 << ('F' - 'A'))
++#endif
 +
-+#ifndef SYS_memfd_create
-+#define SYS_memfd_create     279
++#ifndef HWCAP_ISA_D
++#define HWCAP_ISA_D  (1 << ('D' - 'A'))
 +#endif
-+#ifndef SYS_fallocate
-+#define SYS_fallocate        47
++
++#ifndef HWCAP_ISA_C
++#define HWCAP_ISA_C  (1 << ('C' - 'A'))
 +#endif
 +
-+#endif // OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP
-diff --git a/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp
-new file mode 100644
-index 00000000000..297414bfcd5
---- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp
-@@ -0,0 +1,43 @@
-+/*
-+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
++#ifndef HWCAP_ISA_V
++#define HWCAP_ISA_V  (1 << ('V' - 'A'))
++#endif
 +
-+#ifndef OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP
-+#define OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP
++#define read_csr(csr)                                           \
++({                                                              \
++        register unsigned long __v;                             \
++        __asm__ __volatile__ ("csrr %0, %1"                     \
++                              : "=r" (__v)                      \
++                              : "i" (csr)                       \
++                              : "memory");                      \
++        __v;                                                    \
++})
 +
-+// Sets the default values for platform dependent flags used by the runtime system.
-+// (see globals.hpp)
++uint32_t VM_Version::get_current_vector_length() {
++  assert(_features & CPU_V, "should not call this");
++  return (uint32_t)read_csr(CSR_VLENB);
++}
 +
-+define_pd_global(bool,  DontYieldALot,            false);
-+define_pd_global(intx,  ThreadStackSize,          2048); // 0 => use system default
-+define_pd_global(intx,  VMThreadStackSize,        2048);
++VM_Version::VM_MODE VM_Version::get_satp_mode() {
++  if (!strcmp(_vm_mode, "sv39")) {
++    return VM_SV39;
++  } else if (!strcmp(_vm_mode, "sv48")) {
++    return VM_SV48;
++  } else if (!strcmp(_vm_mode, "sv57")) {
++    return VM_SV57;
++  } else if (!strcmp(_vm_mode, "sv64")) {
++    return VM_SV64;
++  } else {
++    return VM_MBARE;
++  }
++}
 +
-+define_pd_global(intx,  CompilerThreadStackSize,  2048);
++void VM_Version::get_os_cpu_info() {
 +
-+define_pd_global(uintx, JVMInvokeMethodSlack,     8192);
++  uint64_t auxv = getauxval(AT_HWCAP);
 +
-+// Used on 64 bit platforms for UseCompressedOops base address
-+define_pd_global(uintx, HeapBaseMinAddress,       2 * G);
-+
-+#endif // OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP
-diff --git a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
-new file mode 100644
-index 00000000000..1c33dc1e87f
---- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
-@@ -0,0 +1,63 @@
-+/*
-+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#ifndef OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP
-+#define OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP
-+
-+// Included in orderAccess.hpp header file.
-+
-+#include "runtime/vm_version.hpp"
-+
-+// Implementation of class OrderAccess.
-+
-+inline void OrderAccess::loadload()   { acquire(); }
-+inline void OrderAccess::storestore() { release(); }
-+inline void OrderAccess::loadstore()  { acquire(); }
-+inline void OrderAccess::storeload()  { fence(); }
-+
-+#define FULL_MEM_BARRIER  __sync_synchronize()
-+#define READ_MEM_BARRIER  __atomic_thread_fence(__ATOMIC_ACQUIRE);
-+#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE);
-+
-+inline void OrderAccess::acquire() {
-+  READ_MEM_BARRIER;
-+}
-+
-+inline void OrderAccess::release() {
-+  WRITE_MEM_BARRIER;
-+}
-+
-+inline void OrderAccess::fence() {
-+  FULL_MEM_BARRIER;
-+}
-+
-+inline void OrderAccess::cross_modify_fence_impl() {
-+  asm volatile("fence.i" : : : "memory");
-+  if (UseConservativeFence) {
-+    asm volatile("fence ir, ir" : : : "memory");
-+  }
-+}
-+
-+#endif // OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP
-diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
-new file mode 100644
-index 00000000000..1f46bbab0a2
---- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
-@@ -0,0 +1,466 @@
-+/*
-+ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+// no precompiled headers
-+#include "asm/macroAssembler.hpp"
-+#include "classfile/vmSymbols.hpp"
-+#include "code/codeCache.hpp"
-+#include "code/icBuffer.hpp"
-+#include "code/nativeInst.hpp"
-+#include "code/vtableStubs.hpp"
-+#include "interpreter/interpreter.hpp"
-+#include "jvm.h"
-+#include "memory/allocation.inline.hpp"
-+#include "os_share_linux.hpp"
-+#include "prims/jniFastGetField.hpp"
-+#include "prims/jvm_misc.hpp"
-+#include "runtime/arguments.hpp"
-+#include "runtime/frame.inline.hpp"
-+#include "runtime/interfaceSupport.inline.hpp"
-+#include "runtime/java.hpp"
-+#include "runtime/javaCalls.hpp"
-+#include "runtime/mutexLocker.hpp"
-+#include "runtime/osThread.hpp"
-+#include "runtime/safepointMechanism.hpp"
-+#include "runtime/sharedRuntime.hpp"
-+#include "runtime/stubRoutines.hpp"
-+#include "runtime/thread.inline.hpp"
-+#include "runtime/timer.hpp"
-+#include "signals_posix.hpp"
-+#include "utilities/debug.hpp"
-+#include "utilities/events.hpp"
-+#include "utilities/vmError.hpp"
-+
-+// put OS-includes here
-+# include <dlfcn.h>
-+# include <fpu_control.h>
-+# include <errno.h>
-+# include <pthread.h>
-+# include <signal.h>
-+# include <stdio.h>
-+# include <stdlib.h>
-+# include <sys/mman.h>
-+# include <sys/resource.h>
-+# include <sys/socket.h>
-+# include <sys/stat.h>
-+# include <sys/time.h>
-+# include <sys/types.h>
-+# include <sys/utsname.h>
-+# include <sys/wait.h>
-+# include <poll.h>
-+# include <pwd.h>
-+# include <ucontext.h>
-+# include <unistd.h>
-+
-+#define REG_LR       1
-+#define REG_FP       8
-+
-+NOINLINE address os::current_stack_pointer() {
-+  return (address)__builtin_frame_address(0);
-+}
-+
-+char* os::non_memory_address_word() {
-+  // Must never look like an address returned by reserve_memory,
-+  return (char*) -1;
-+}
-+
-+address os::Posix::ucontext_get_pc(const ucontext_t * uc) {
-+  return (address)uc->uc_mcontext.__gregs[REG_PC];
-+}
-+
-+void os::Posix::ucontext_set_pc(ucontext_t * uc, address pc) {
-+  uc->uc_mcontext.__gregs[REG_PC] = (intptr_t)pc;
-+}
-+
-+intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) {
-+  return (intptr_t*)uc->uc_mcontext.__gregs[REG_SP];
-+}
-+
-+intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) {
-+  return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP];
-+}
-+
-+address os::fetch_frame_from_context(const void* ucVoid,
-+                                     intptr_t** ret_sp, intptr_t** ret_fp) {
-+  address epc;
-+  const ucontext_t* uc = (const ucontext_t*)ucVoid;
-+
-+  if (uc != NULL) {
-+    epc = os::Posix::ucontext_get_pc(uc);
-+    if (ret_sp != NULL) {
-+      *ret_sp = os::Linux::ucontext_get_sp(uc);
-+    }
-+    if (ret_fp != NULL) {
-+      *ret_fp = os::Linux::ucontext_get_fp(uc);
-+    }
-+  } else {
-+    epc = NULL;
-+    if (ret_sp != NULL) {
-+      *ret_sp = (intptr_t *)NULL;
-+    }
-+    if (ret_fp != NULL) {
-+      *ret_fp = (intptr_t *)NULL;
-+    }
-+  }
-+
-+  return epc;
-+}
-+
-+frame os::fetch_compiled_frame_from_context(const void* ucVoid) {
-+  const ucontext_t* uc = (const ucontext_t*)ucVoid;
-+  // In compiled code, the stack banging is performed before RA
-+  // has been saved in the frame. RA is live, and SP and FP
-+  // belong to the caller.
-+  intptr_t* frame_fp = os::Linux::ucontext_get_fp(uc);
-+  intptr_t* frame_sp = os::Linux::ucontext_get_sp(uc);
-+  address frame_pc = (address)(uc->uc_mcontext.__gregs[REG_LR]
-+                         - NativeInstruction::instruction_size);
-+  return frame(frame_sp, frame_fp, frame_pc);
-+}
-+
-+frame os::fetch_frame_from_context(const void* ucVoid) {
-+  intptr_t* frame_sp = NULL;
-+  intptr_t* frame_fp = NULL;
-+  address epc = fetch_frame_from_context(ucVoid, &frame_sp, &frame_fp);
-+  return frame(frame_sp, frame_fp, epc);
-+}
-+
-+// By default, gcc always saves frame pointer rfp on this stack. This
-+// may get turned off by -fomit-frame-pointer.
-+frame os::get_sender_for_C_frame(frame* fr) {
-+  return frame(fr->sender_sp(), fr->link(), fr->sender_pc());
-+}
-+
-+NOINLINE frame os::current_frame() {
-+  intptr_t **sender_sp = (intptr_t **)__builtin_frame_address(0);
-+  if (sender_sp != NULL) {
-+    frame myframe((intptr_t*)os::current_stack_pointer(),
-+                  sender_sp[frame::link_offset],
-+                  CAST_FROM_FN_PTR(address, os::current_frame));
-+    if (os::is_first_C_frame(&myframe)) {
-+      // stack is not walkable
-+      return frame();
-+    } else {
-+      return os::get_sender_for_C_frame(&myframe);
-+    }
-+  } else {
-+    ShouldNotReachHere();
-+    return frame();
-+  }
-+}
-+
-+// Utility functions
-+bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
-+                                             ucontext_t* uc, JavaThread* thread) {
-+
-+  // decide if this trap can be handled by a stub
-+  address stub = NULL;
-+
-+  address pc = NULL;
-+
-+  //%note os_trap_1
-+  if (info != NULL && uc != NULL && thread != NULL) {
-+    pc = (address) os::Posix::ucontext_get_pc(uc);
-+
-+    address addr = (address) info->si_addr;
-+
-+    // Make sure the high order byte is sign extended, as it may be masked away by the hardware.
-+    if ((uintptr_t(addr) & (uintptr_t(1) << 55)) != 0) {
-+      addr = address(uintptr_t(addr) | (uintptr_t(0xFF) << 56));
-+    }
-+
-+    // Handle ALL stack overflow variations here
-+    if (sig == SIGSEGV) {
-+      // check if fault address is within thread stack
-+      if (thread->is_in_full_stack(addr)) {
-+        if (os::Posix::handle_stack_overflow(thread, addr, pc, uc, &stub)) {
-+          return true; // continue
-+        }
-+      }
-+    }
++  STATIC_ASSERT(CPU_I == HWCAP_ISA_I);
++  STATIC_ASSERT(CPU_M == HWCAP_ISA_M);
++  STATIC_ASSERT(CPU_A == HWCAP_ISA_A);
++  STATIC_ASSERT(CPU_F == HWCAP_ISA_F);
++  STATIC_ASSERT(CPU_D == HWCAP_ISA_D);
++  STATIC_ASSERT(CPU_C == HWCAP_ISA_C);
++  STATIC_ASSERT(CPU_V == HWCAP_ISA_V);
 +
-+    if (thread->thread_state() == _thread_in_Java) {
-+      // Java thread running in Java code => find exception handler if any
-+      // a fault inside compiled code, the interpreter, or a stub
++  // RISC-V has four bit-manipulation ISA-extensions: Zba/Zbb/Zbc/Zbs.
++  // Availability for those extensions could not be queried from HWCAP.
++  // TODO: Add proper detection for those extensions.
++  _features = auxv & (
++      HWCAP_ISA_I |
++      HWCAP_ISA_M |
++      HWCAP_ISA_A |
++      HWCAP_ISA_F |
++      HWCAP_ISA_D |
++      HWCAP_ISA_C |
++      HWCAP_ISA_V);
 +
-+      // Handle signal from NativeJump::patch_verified_entry().
-+      if ((sig == SIGILL || sig == SIGTRAP)
-+          && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) {
-+        if (TraceTraps) {
-+          tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL");
-+        }
-+        stub = SharedRuntime::get_handle_wrong_method_stub();
-+      } else if (sig == SIGSEGV && SafepointMechanism::is_poll_address((address)info->si_addr)) {
-+        stub = SharedRuntime::get_poll_stub(pc);
-+      } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) {
-+        // BugId 4454115: A read from a MappedByteBuffer can fault
-+        // here if the underlying file has been truncated.
-+        // Do not crash the VM in such a case.
-+        CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
-+        CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
-+        bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc));
-+        if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) {
-+          address next_pc = pc + NativeCall::instruction_size;
-+          if (is_unsafe_arraycopy) {
-+            next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
++  if (FILE *f = fopen("/proc/cpuinfo", "r")) {
++    char buf[512], *p;
++    while (fgets(buf, sizeof (buf), f) != NULL) {
++      if ((p = strchr(buf, ':')) != NULL) {
++        if (strncmp(buf, "mmu", sizeof "mmu" - 1) == 0) {
++          if (_vm_mode[0] != '\0') {
++            continue;
 +          }
-+          stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
-+        }
-+      } else if (sig == SIGILL && nativeInstruction_at(pc)->is_stop()) {
-+        // Pull a pointer to the error message out of the instruction
-+        // stream.
-+        const uint64_t *detail_msg_ptr
-+          = (uint64_t*)(pc + NativeInstruction::instruction_size);
-+        const char *detail_msg = (const char *)*detail_msg_ptr;
-+        const char *msg = "stop";
-+        if (TraceTraps) {
-+          tty->print_cr("trap: %s: (SIGILL)", msg);
++          char* vm_mode = os::strdup(p + 2);
++          vm_mode[strcspn(vm_mode, "\n")] = '\0';
++          _vm_mode = vm_mode;
++        } else if (strncmp(buf, "uarch", sizeof "uarch" - 1) == 0) {
++          char* uarch = os::strdup(p + 2);
++          uarch[strcspn(uarch, "\n")] = '\0';
++          _uarch = uarch;
++          break;
 +        }
-+
-+        // End life with a fatal error, message and detail message and the context.
-+        // Note: no need to do any post-processing here (e.g. signal chaining)
-+        va_list va_dummy;
-+        VMError::report_and_die(thread, uc, NULL, 0, msg, detail_msg, va_dummy);
-+        va_end(va_dummy);
-+
-+        ShouldNotReachHere();
-+      } else if (sig == SIGFPE  &&
-+          (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) {
-+        stub =
-+          SharedRuntime::
-+          continuation_for_implicit_exception(thread,
-+                                              pc,
-+                                              SharedRuntime::
-+                                              IMPLICIT_DIVIDE_BY_ZERO);
-+      } else if (sig == SIGSEGV &&
-+                 MacroAssembler::uses_implicit_null_check((void*)addr)) {
-+          // Determination of interpreter/vtable stub/compiled code null exception
-+          stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
-+      }
-+    } else if ((thread->thread_state() == _thread_in_vm ||
-+                thread->thread_state() == _thread_in_native) &&
-+                sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
-+                thread->doing_unsafe_access()) {
-+      address next_pc = pc + NativeCall::instruction_size;
-+      if (UnsafeCopyMemory::contains_pc(pc)) {
-+        next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
-+      }
-+      stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
-+    }
-+
-+    // jni_fast_Get<Primitive>Field can trap at certain pc's if a GC kicks in
-+    // and the heap gets shrunk before the field access.
-+    if ((sig == SIGSEGV) || (sig == SIGBUS)) {
-+      address addr_slow = JNI_FastGetField::find_slowcase_pc(pc);
-+      if (addr_slow != (address)-1) {
-+        stub = addr_slow;
 +      }
 +    }
++    fclose(f);
 +  }
-+
-+  if (stub != NULL) {
-+    // save all thread context in case we need to restore it
-+    if (thread != NULL) {
-+      thread->set_saved_exception_pc(pc);
-+    }
-+
-+    os::Posix::ucontext_set_pc(uc, stub);
-+    return true;
-+  }
-+
-+  return false; // Mute compiler
-+}
-+
-+void os::Linux::init_thread_fpu_state(void) {
-+}
-+
-+int os::Linux::get_fpu_control_word(void) {
-+  return 0;
 +}
+diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp
+index e30d39f73d..c640c546b1 100644
+--- a/src/hotspot/share/c1/c1_LIR.cpp
++++ b/src/hotspot/share/c1/c1_LIR.cpp
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -200,6 +200,9 @@ void LIR_Op2::verify() const {
+ #ifdef ASSERT
+   switch (code()) {
+     case lir_cmove:
++#ifdef RISCV
++      assert(false, "lir_cmove is LIR_Op4 on RISCV");
++#endif
+     case lir_xchg:
+       break;
+ 
+@@ -252,9 +255,13 @@ void LIR_Op2::verify() const {
+ 
+ 
+ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block)
++#ifdef RISCV
++  : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type)
++#else
+   : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
+   , _cond(cond)
+   , _type(type)
++#endif
+   , _label(block->label())
+   , _block(block)
+   , _ublock(NULL)
+@@ -262,9 +269,13 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block
+ }
+ 
+ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub) :
++#ifdef RISCV
++  LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type)
++#else
+   LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
+   , _cond(cond)
+   , _type(type)
++#endif
+   , _label(stub->entry())
+   , _block(NULL)
+   , _ublock(NULL)
+@@ -272,9 +283,13 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub) :
+ }
+ 
+ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock)
++#ifdef RISCV
++  : LIR_Op2(lir_cond_float_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type)
++#else
+   : LIR_Op(lir_cond_float_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
+   , _cond(cond)
+   , _type(type)
++#endif
+   , _label(block->label())
+   , _block(block)
+   , _ublock(ublock)
+@@ -296,13 +311,13 @@ void LIR_OpBranch::change_ublock(BlockBegin* b) {
+ }
+ 
+ void LIR_OpBranch::negate_cond() {
+-  switch (_cond) {
+-    case lir_cond_equal:        _cond = lir_cond_notEqual;     break;
+-    case lir_cond_notEqual:     _cond = lir_cond_equal;        break;
+-    case lir_cond_less:         _cond = lir_cond_greaterEqual; break;
+-    case lir_cond_lessEqual:    _cond = lir_cond_greater;      break;
+-    case lir_cond_greaterEqual: _cond = lir_cond_less;         break;
+-    case lir_cond_greater:      _cond = lir_cond_lessEqual;    break;
++  switch (cond()) {
++    case lir_cond_equal:        set_cond(lir_cond_notEqual);     break;
++    case lir_cond_notEqual:     set_cond(lir_cond_equal);        break;
++    case lir_cond_less:         set_cond(lir_cond_greaterEqual); break;
++    case lir_cond_lessEqual:    set_cond(lir_cond_greater);      break;
++    case lir_cond_greaterEqual: set_cond(lir_cond_less);         break;
++    case lir_cond_greater:      set_cond(lir_cond_lessEqual);    break;
+     default: ShouldNotReachHere();
+   }
+ }
+@@ -525,6 +540,15 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
+       assert(op->as_OpBranch() != NULL, "must be");
+       LIR_OpBranch* opBranch = (LIR_OpBranch*)op;
+ 
++#ifdef RISCV
++      assert(opBranch->_tmp1->is_illegal() && opBranch->_tmp2->is_illegal() &&
++             opBranch->_tmp3->is_illegal() && opBranch->_tmp4->is_illegal() &&
++             opBranch->_tmp5->is_illegal(), "not used");
 +
-+void os::Linux::set_fpu_control_word(int fpu_control) {
-+}
++      if (opBranch->_opr1->is_valid()) do_input(opBranch->_opr1);
++      if (opBranch->_opr2->is_valid()) do_input(opBranch->_opr2);
++#endif
 +
-+////////////////////////////////////////////////////////////////////////////////
-+// thread stack
+       if (opBranch->_info != NULL)     do_info(opBranch->_info);
+       assert(opBranch->_result->is_illegal(), "not used");
+       if (opBranch->_stub != NULL)     opBranch->stub()->visit(this);
+@@ -615,6 +639,21 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
+     // to the result operand, otherwise the backend fails
+     case lir_cmove:
+     {
++#ifdef RISCV
++      assert(op->as_Op4() != NULL, "must be");
++      LIR_Op4* op4 = (LIR_Op4*)op;
 +
-+// Minimum usable stack sizes required to get to user code. Space for
-+// HotSpot guard pages is added later.
-+size_t os::Posix::_compiler_thread_min_stack_allowed = 72 * K;
-+size_t os::Posix::_java_thread_min_stack_allowed = 72 * K;
-+size_t os::Posix::_vm_internal_thread_min_stack_allowed = 72 * K;
++      assert(op4->_info == NULL && op4->_tmp1->is_illegal() && op4->_tmp2->is_illegal() &&
++             op4->_tmp3->is_illegal() && op4->_tmp4->is_illegal() && op4->_tmp5->is_illegal(), "not used");
++      assert(op4->_opr1->is_valid() && op4->_opr2->is_valid() && op4->_result->is_valid(), "used");
 +
-+// return default stack size for thr_type
-+size_t os::Posix::default_stack_size(os::ThreadType thr_type) {
-+  // default stack size (compiler thread needs larger stack)
-+  size_t s = (thr_type == os::compiler_thread ? 4 * M : 1 * M);
-+  return s;
++      do_input(op4->_opr1);
++      do_input(op4->_opr2);
++      if (op4->_opr3->is_valid()) do_input(op4->_opr3);
++      if (op4->_opr4->is_valid()) do_input(op4->_opr4);
++      do_temp(op4->_opr2);
++      do_output(op4->_result);
++#else
+       assert(op->as_Op2() != NULL, "must be");
+       LIR_Op2* op2 = (LIR_Op2*)op;
+ 
+@@ -626,6 +665,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
+       do_input(op2->_opr2);
+       do_temp(op2->_opr2);
+       do_output(op2->_result);
++#endif
+ 
+       break;
+     }
+@@ -1048,6 +1088,12 @@ void LIR_Op3::emit_code(LIR_Assembler* masm) {
+   masm->emit_op3(this);
+ }
+ 
++#ifdef RISCV
++void LIR_Op4::emit_code(LIR_Assembler* masm) {
++  masm->emit_op4(this);
 +}
++#endif
 +
-+/////////////////////////////////////////////////////////////////////////////
-+// helper functions for fatal error handler
-+
-+static const char* reg_abi_names[] = {
-+  "pc",
-+  "x1(ra)", "x2(sp)", "x3(gp)", "x4(tp)",
-+  "x5(t0)", "x6(t1)", "x7(t2)",
-+  "x8(s0)", "x9(s1)",
-+  "x10(a0)", "x11(a1)", "x12(a2)", "x13(a3)", "x14(a4)", "x15(a5)", "x16(a6)", "x17(a7)",
-+  "x18(s2)", "x19(s3)", "x20(s4)", "x21(s5)", "x22(s6)", "x23(s7)", "x24(s8)", "x25(s9)", "x26(s10)", "x27(s11)",
-+  "x28(t3)", "x29(t4)","x30(t5)", "x31(t6)"
-+};
-+
-+void os::print_context(outputStream *st, const void *context) {
-+  if (context == NULL) {
-+    return;
+ void LIR_OpLock::emit_code(LIR_Assembler* masm) {
+   masm->emit_lock(this);
+   if (stub()) {
+@@ -1084,6 +1130,10 @@ LIR_List::LIR_List(Compilation* compilation, BlockBegin* block)
+   , _file(NULL)
+   , _line(0)
+ #endif
++#ifdef RISCV
++  , _cmp_opr1(LIR_OprFact::illegalOpr)
++  , _cmp_opr2(LIR_OprFact::illegalOpr)
++#endif
+ { }
+ 
+ 
+@@ -1101,6 +1151,38 @@ void LIR_List::set_file_and_line(const char * file, int line) {
+ }
+ #endif
+ 
++#ifdef RISCV
++void LIR_List::set_cmp_oprs(LIR_Op* op) {
++  switch (op->code()) {
++    case lir_cmp:
++      _cmp_opr1 = op->as_Op2()->in_opr1();
++      _cmp_opr2 = op->as_Op2()->in_opr2();
++      break;
++    case lir_branch: // fall through
++    case lir_cond_float_branch:
++      assert(op->as_OpBranch()->cond() == lir_cond_always ||
++            (_cmp_opr1 != LIR_OprFact::illegalOpr && _cmp_opr2 != LIR_OprFact::illegalOpr),
++            "conditional branches must have legal operands");
++      if (op->as_OpBranch()->cond() != lir_cond_always) {
++        op->as_Op2()->set_in_opr1(_cmp_opr1);
++        op->as_Op2()->set_in_opr2(_cmp_opr2);
++      }
++      break;
++    case lir_cmove:
++      op->as_Op4()->set_in_opr3(_cmp_opr1);
++      op->as_Op4()->set_in_opr4(_cmp_opr2);
++      break;
++#if INCLUDE_ZGC
++    case lir_zloadbarrier_test:
++      _cmp_opr1 = FrameMap::as_opr(t1);
++      _cmp_opr2 = LIR_OprFact::intConst(0);
++      break;
++#endif
++    default:
++      break;
 +  }
-+
-+  const ucontext_t *uc = (const ucontext_t*)context;
-+  st->print_cr("Registers:");
-+  for (int r = 0; r < 32; r++) {
-+    st->print("%-*.*s=", 8, 8, reg_abi_names[r]);
-+    print_location(st, uc->uc_mcontext.__gregs[r]);
-+  }
-+  st->cr();
-+
-+  intptr_t *frame_sp = (intptr_t *)os::Linux::ucontext_get_sp(uc);
-+  st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(frame_sp));
-+  print_hex_dump(st, (address)frame_sp, (address)(frame_sp + 64), sizeof(intptr_t));
-+  st->cr();
-+
-+  // Note: it may be unsafe to inspect memory near pc. For example, pc may
-+  // point to garbage if entry point in an nmethod is corrupted. Leave
-+  // this at the end, and hope for the best.
-+  address pc = os::Posix::ucontext_get_pc(uc);
-+  print_instructions(st, pc, sizeof(char));
-+  st->cr();
 +}
++#endif
+ 
+ void LIR_List::append(LIR_InsertionBuffer* buffer) {
+   assert(this == buffer->lir_list(), "wrong lir list");
+@@ -1841,6 +1923,10 @@ void LIR_Op1::print_patch_code(outputStream* out, LIR_PatchCode code) {
+ // LIR_OpBranch
+ void LIR_OpBranch::print_instr(outputStream* out) const {
+   print_condition(out, cond());             out->print(" ");
++#ifdef RISCV
++  in_opr1()->print(out); out->print(" ");
++  in_opr2()->print(out); out->print(" ");
++#endif
+   if (block() != NULL) {
+     out->print("[B%d] ", block()->block_id());
+   } else if (stub() != NULL) {
+@@ -1927,7 +2013,11 @@ void LIR_OpRoundFP::print_instr(outputStream* out) const {
+ 
+ // LIR_Op2
+ void LIR_Op2::print_instr(outputStream* out) const {
++#ifdef RISCV
++  if (code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch) {
++#else
+   if (code() == lir_cmove || code() == lir_cmp) {
++#endif
+     print_condition(out, condition());         out->print(" ");
+   }
+   in_opr1()->print(out);    out->print(" ");
+@@ -1978,6 +2068,17 @@ void LIR_Op3::print_instr(outputStream* out) const {
+   result_opr()->print(out);
+ }
+ 
++#ifdef RISCV
++// LIR_Op4
++void LIR_Op4::print_instr(outputStream* out) const {
++  print_condition(out, condition()); out->print(" ");
++  in_opr1()->print(out);             out->print(" ");
++  in_opr2()->print(out);             out->print(" ");
++  in_opr3()->print(out);             out->print(" ");
++  in_opr4()->print(out);             out->print(" ");
++  result_opr()->print(out);
++}
++#endif
+ 
+ void LIR_OpLock::print_instr(outputStream* out) const {
+   hdr_opr()->print(out);   out->print(" ");
+diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp
+index 3234ca018b..33943e369d 100644
+--- a/src/hotspot/share/c1/c1_LIR.hpp
++++ b/src/hotspot/share/c1/c1_LIR.hpp
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -867,6 +867,9 @@ class    LIR_Op2;
+ class    LIR_OpDelay;
+ class    LIR_Op3;
+ class      LIR_OpAllocArray;
++#ifdef RISCV
++class    LIR_Op4;
++#endif
+ class    LIR_OpCall;
+ class      LIR_OpJavaCall;
+ class      LIR_OpRTCall;
+@@ -916,8 +919,10 @@ enum LIR_Code {
+       , lir_null_check
+       , lir_return
+       , lir_leal
++#ifndef RISCV
+       , lir_branch
+       , lir_cond_float_branch
++#endif
+       , lir_move
+       , lir_convert
+       , lir_alloc_object
+@@ -929,11 +934,17 @@ enum LIR_Code {
+       , lir_unwind
+   , end_op1
+   , begin_op2
++#ifdef RISCV
++      , lir_branch
++      , lir_cond_float_branch
++#endif
+       , lir_cmp
+       , lir_cmp_l2i
+       , lir_ucmp_fd2i
+       , lir_cmp_fd2i
++#ifndef RISCV
+       , lir_cmove
++#endif
+       , lir_add
+       , lir_sub
+       , lir_mul
+@@ -964,6 +975,11 @@ enum LIR_Code {
+       , lir_fmad
+       , lir_fmaf
+   , end_op3
++#ifdef RISCV
++  , begin_op4
++      , lir_cmove
++  , end_op4
++#endif
+   , begin_opJavaCall
+       , lir_static_call
+       , lir_optvirtual_call
+@@ -1001,6 +1017,11 @@ enum LIR_Code {
+   , begin_opAssert
+     , lir_assert
+   , end_opAssert
++#if defined(RISCV) && defined(INCLUDE_ZGC)
++  , begin_opZLoadBarrierTest
++    , lir_zloadbarrier_test
++  , end_opZLoadBarrierTest
++#endif
+ };
+ 
+ 
+@@ -1134,6 +1155,9 @@ class LIR_Op: public CompilationResourceObj {
+   virtual LIR_Op1* as_Op1() { return NULL; }
+   virtual LIR_Op2* as_Op2() { return NULL; }
+   virtual LIR_Op3* as_Op3() { return NULL; }
++#ifdef RISCV
++  virtual LIR_Op4* as_Op4() { return NULL; }
++#endif
+   virtual LIR_OpArrayCopy* as_OpArrayCopy() { return NULL; }
+   virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return NULL; }
+   virtual LIR_OpTypeCheck* as_OpTypeCheck() { return NULL; }
+@@ -1410,51 +1434,6 @@ class LIR_OpRTCall: public LIR_OpCall {
+   virtual void verify() const;
+ };
+ 
+-
+-class LIR_OpBranch: public LIR_Op {
+- friend class LIR_OpVisitState;
+-
+- private:
+-  LIR_Condition _cond;
+-  BasicType     _type;
+-  Label*        _label;
+-  BlockBegin*   _block;  // if this is a branch to a block, this is the block
+-  BlockBegin*   _ublock; // if this is a float-branch, this is the unorderd block
+-  CodeStub*     _stub;   // if this is a branch to a stub, this is the stub
+-
+- public:
+-  LIR_OpBranch(LIR_Condition cond, BasicType type, Label* lbl)
+-    : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL)
+-    , _cond(cond)
+-    , _type(type)
+-    , _label(lbl)
+-    , _block(NULL)
+-    , _ublock(NULL)
+-    , _stub(NULL) { }
+-
+-  LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block);
+-  LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub);
+-
+-  // for unordered comparisons
+-  LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock);
+-
+-  LIR_Condition cond()        const              { return _cond;        }
+-  BasicType     type()        const              { return _type;        }
+-  Label*        label()       const              { return _label;       }
+-  BlockBegin*   block()       const              { return _block;       }
+-  BlockBegin*   ublock()      const              { return _ublock;      }
+-  CodeStub*     stub()        const              { return _stub;       }
+-
+-  void          change_block(BlockBegin* b);
+-  void          change_ublock(BlockBegin* b);
+-  void          negate_cond();
+-
+-  virtual void emit_code(LIR_Assembler* masm);
+-  virtual LIR_OpBranch* as_OpBranch() { return this; }
+-  virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
+-};
+-
+-
+ class ConversionStub;
+ 
+ class LIR_OpConvert: public LIR_Op1 {
+@@ -1614,19 +1593,19 @@ class LIR_Op2: public LIR_Op {
+   void verify() const;
+ 
+  public:
+-  LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, CodeEmitInfo* info = NULL)
++  LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, CodeEmitInfo* info = NULL, BasicType type = T_ILLEGAL)
+     : LIR_Op(code, LIR_OprFact::illegalOpr, info)
+     , _opr1(opr1)
+     , _opr2(opr2)
+-    , _type(T_ILLEGAL)
+-    , _condition(condition)
+     , _fpu_stack_size(0)
++    , _type(type)
+     , _tmp1(LIR_OprFact::illegalOpr)
+     , _tmp2(LIR_OprFact::illegalOpr)
+     , _tmp3(LIR_OprFact::illegalOpr)
+     , _tmp4(LIR_OprFact::illegalOpr)
+-    , _tmp5(LIR_OprFact::illegalOpr) {
+-    assert(code == lir_cmp || code == lir_assert, "code check");
++    , _tmp5(LIR_OprFact::illegalOpr)
++    , _condition(condition) {
++    assert(code == lir_cmp || code == lir_assert RISCV_ONLY(|| code == lir_branch || code == lir_cond_float_branch), "code check");
+   }
+ 
+   LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type)
+@@ -1651,14 +1630,14 @@ class LIR_Op2: public LIR_Op {
+     , _opr1(opr1)
+     , _opr2(opr2)
+     , _type(type)
+-    , _condition(lir_cond_unknown)
+     , _fpu_stack_size(0)
+     , _tmp1(LIR_OprFact::illegalOpr)
+     , _tmp2(LIR_OprFact::illegalOpr)
+     , _tmp3(LIR_OprFact::illegalOpr)
+     , _tmp4(LIR_OprFact::illegalOpr)
+-    , _tmp5(LIR_OprFact::illegalOpr) {
+-    assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
++    , _tmp5(LIR_OprFact::illegalOpr)
++    , _condition(lir_cond_unknown) {
++    assert(code != lir_cmp && RISCV_ONLY(code != lir_branch && code != lir_cond_float_branch &&) is_in_range(code, begin_op2, end_op2), "code check");
+   }
+ 
+   LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr,
+@@ -1667,14 +1646,14 @@ class LIR_Op2: public LIR_Op {
+     , _opr1(opr1)
+     , _opr2(opr2)
+     , _type(T_ILLEGAL)
+-    , _condition(lir_cond_unknown)
+     , _fpu_stack_size(0)
+     , _tmp1(tmp1)
+     , _tmp2(tmp2)
+     , _tmp3(tmp3)
+     , _tmp4(tmp4)
+-    , _tmp5(tmp5) {
+-    assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
++    , _tmp5(tmp5)
++    , _condition(lir_cond_unknown) {
++    assert(code != lir_cmp && RISCV_ONLY(code != lir_branch && code != lir_cond_float_branch &&) is_in_range(code, begin_op2, end_op2), "code check");
+   }
+ 
+   LIR_Opr in_opr1() const                        { return _opr1; }
+@@ -1686,10 +1665,18 @@ class LIR_Op2: public LIR_Op {
+   LIR_Opr tmp4_opr() const                       { return _tmp4; }
+   LIR_Opr tmp5_opr() const                       { return _tmp5; }
+   LIR_Condition condition() const  {
++#ifdef RISCV
++    assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch || code() == lir_assert, "only valid for branch and assert"); return _condition;
++#else
+     assert(code() == lir_cmp || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); return _condition;
++#endif
+   }
+   void set_condition(LIR_Condition condition) {
++#ifdef RISCV
++    assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch, "only valid for branch"); _condition = condition;
++#else
+     assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove");  _condition = condition;
++#endif
+   }
+ 
+   void set_fpu_stack_size(int size)              { _fpu_stack_size = size; }
+@@ -1703,6 +1690,65 @@ class LIR_Op2: public LIR_Op {
+   virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
+ };
+ 
++#ifdef RISCV
++class LIR_OpBranch: public LIR_Op2 {
++#else
++class LIR_OpBranch: public LIR_Op {
++#endif
++ friend class LIR_OpVisitState;
 +
-+void os::print_register_info(outputStream *st, const void *context) {
-+  if (context == NULL) {
-+    return;
-+  }
-+
-+  const ucontext_t *uc = (const ucontext_t*)context;
-+
-+  st->print_cr("Register to memory mapping:");
-+  st->cr();
-+
-+  // this is horrendously verbose but the layout of the registers in the
-+  // context does not match how we defined our abstract Register set, so
-+  // we can't just iterate through the gregs area
++ private:
++#ifndef RISCV
++  LIR_Condition _cond;
++  BasicType     _type;
++#endif
++  Label*        _label;
++  BlockBegin*   _block;  // if this is a branch to a block, this is the block
++  BlockBegin*   _ublock; // if this is a float-branch, this is the unorderd block
++  CodeStub*     _stub;   // if this is a branch to a stub, this is the stub
 +
-+  // this is only for the "general purpose" registers
++ public:
++  LIR_OpBranch(LIR_Condition cond, BasicType type, Label* lbl)
++#ifdef RISCV
++    : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL, type)
++#else
++    : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL)
++    , _cond(cond)
++    , _type(type)
++#endif
++    , _label(lbl)
++    , _block(NULL)
++    , _ublock(NULL)
++    , _stub(NULL) { }
 +
-+  for (int r = 0; r < 32; r++)
-+    st->print_cr("%-*.*s=" INTPTR_FORMAT, 8, 8, reg_abi_names[r], (uintptr_t)uc->uc_mcontext.__gregs[r]);
-+  st->cr();
-+}
++  LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block);
++  LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub);
 +
-+void os::setup_fpu() {
-+}
++  // for unordered comparisons
++  LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock);
 +
-+#ifndef PRODUCT
-+void os::verify_stack_alignment() {
-+  assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment");
-+}
++#ifdef RISCV
++  LIR_Condition cond()        const              { return condition();  }
++  void set_cond(LIR_Condition cond)              { set_condition(cond); }
++#else
++  LIR_Condition cond()        const              { return _cond;        }
++  void set_cond(LIR_Condition cond)              { _cond = cond;        }
 +#endif
++  BasicType     type()        const              { return _type;        }
++  Label*        label()       const              { return _label;       }
++  BlockBegin*   block()       const              { return _block;       }
++  BlockBegin*   ublock()      const              { return _ublock;      }
++  CodeStub*     stub()        const              { return _stub;        }
 +
-+int os::extra_bang_size_in_bytes() {
-+  return 0;
-+}
++  void          change_block(BlockBegin* b);
++  void          change_ublock(BlockBegin* b);
++  void          negate_cond();
 +
-+extern "C" {
-+  int SpinPause() {
-+    return 0;
-+  }
++  virtual void emit_code(LIR_Assembler* masm);
++  virtual LIR_OpBranch* as_OpBranch() { return this; }
++  virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
++};
 +
-+  void _Copy_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
-+    if (from > to) {
-+      const jshort *end = from + count;
-+      while (from < end) {
-+        *(to++) = *(from++);
-+      }
-+    } else if (from < to) {
-+      const jshort *end = from;
-+      from += count - 1;
-+      to   += count - 1;
-+      while (from >= end) {
-+        *(to--) = *(from--);
-+      }
-+    }
-+  }
-+  void _Copy_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
-+    if (from > to) {
-+      const jint *end = from + count;
-+      while (from < end) {
-+        *(to++) = *(from++);
-+      }
-+    } else if (from < to) {
-+      const jint *end = from;
-+      from += count - 1;
-+      to   += count - 1;
-+      while (from >= end) {
-+        *(to--) = *(from--);
-+      }
-+    }
-+  }
-+  void _Copy_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
-+    if (from > to) {
-+      const jlong *end = from + count;
-+      while (from < end) {
-+        os::atomic_copy64(from++, to++);
-+      }
-+    } else if (from < to) {
-+      const jlong *end = from;
-+      from += count - 1;
-+      to   += count - 1;
-+      while (from >= end) {
-+        os::atomic_copy64(from--, to--);
-+      }
-+    }
-+  }
+ class LIR_OpAllocArray : public LIR_Op {
+  friend class LIR_OpVisitState;
+ 
+@@ -1766,6 +1812,65 @@ class LIR_Op3: public LIR_Op {
+   virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
+ };
+ 
++#ifdef RISCV
++class LIR_Op4: public LIR_Op {
++  friend class LIR_OpVisitState;
++ protected:
++  LIR_Opr   _opr1;
++  LIR_Opr   _opr2;
++  LIR_Opr   _opr3;
++  LIR_Opr   _opr4;
++  BasicType _type;
++  LIR_Opr   _tmp1;
++  LIR_Opr   _tmp2;
++  LIR_Opr   _tmp3;
++  LIR_Opr   _tmp4;
++  LIR_Opr   _tmp5;
++  LIR_Condition _condition;
 +
-+  void _Copy_arrayof_conjoint_bytes(const HeapWord* from,
-+                                    HeapWord* to,
-+                                    size_t    count) {
-+    memmove(to, from, count);
-+  }
-+  void _Copy_arrayof_conjoint_jshorts(const HeapWord* from,
-+                                      HeapWord* to,
-+                                      size_t    count) {
-+    memmove(to, from, count * 2);
-+  }
-+  void _Copy_arrayof_conjoint_jints(const HeapWord* from,
-+                                    HeapWord* to,
-+                                    size_t    count) {
-+    memmove(to, from, count * 4);
-+  }
-+  void _Copy_arrayof_conjoint_jlongs(const HeapWord* from,
-+                                     HeapWord* to,
-+                                     size_t    count) {
-+    memmove(to, from, count * 8);
++ public:
++  LIR_Op4(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr opr3, LIR_Opr opr4,
++          LIR_Opr result, BasicType type)
++    : LIR_Op(code, result, NULL)
++    , _opr1(opr1)
++    , _opr2(opr2)
++    , _opr3(opr3)
++    , _opr4(opr4)
++    , _type(type)
++    , _tmp1(LIR_OprFact::illegalOpr)
++    , _tmp2(LIR_OprFact::illegalOpr)
++    , _tmp3(LIR_OprFact::illegalOpr)
++    , _tmp4(LIR_OprFact::illegalOpr)
++    , _tmp5(LIR_OprFact::illegalOpr)
++    , _condition(condition) {
++    assert(code == lir_cmove, "code check");
++    assert(type != T_ILLEGAL, "cmove should have type");
 +  }
-+};
-diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp
-new file mode 100644
-index 00000000000..6d415630661
---- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp
-@@ -0,0 +1,59 @@
-+/*
-+ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
 +
-+#ifndef OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP
-+#define OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP
++  LIR_Opr in_opr1() const                        { return _opr1; }
++  LIR_Opr in_opr2() const                        { return _opr2; }
++  LIR_Opr in_opr3() const                        { return _opr3; }
++  LIR_Opr in_opr4() const                        { return _opr4; }
++  BasicType type()  const                        { return _type; }
++  LIR_Opr tmp1_opr() const                       { return _tmp1; }
++  LIR_Opr tmp2_opr() const                       { return _tmp2; }
++  LIR_Opr tmp3_opr() const                       { return _tmp3; }
++  LIR_Opr tmp4_opr() const                       { return _tmp4; }
++  LIR_Opr tmp5_opr() const                       { return _tmp5; }
 +
-+  static void setup_fpu();
++  LIR_Condition condition() const                { return _condition; }
++  void set_condition(LIR_Condition condition)    { _condition = condition; }
 +
-+  // Used to register dynamic code cache area with the OS
-+  // Note: Currently only used in 64 bit Windows implementations
-+  static bool register_code_area(char *low, char *high) { return true; }
++  void set_in_opr1(LIR_Opr opr)                  { _opr1 = opr; }
++  void set_in_opr2(LIR_Opr opr)                  { _opr2 = opr; }
++  void set_in_opr3(LIR_Opr opr)                  { _opr3 = opr; }
++  void set_in_opr4(LIR_Opr opr)                  { _opr4 = opr; }
++  virtual void emit_code(LIR_Assembler* masm);
++  virtual LIR_Op4* as_Op4() { return this; }
 +
-+  // Atomically copy 64 bits of data
-+  static void atomic_copy64(const volatile void *src, volatile void *dst) {
-+    *(jlong *) dst = *(const jlong *) src;
-+  }
++  virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
++};
++#endif
+ 
+ //--------------------------------
+ class LabelObj: public CompilationResourceObj {
+@@ -1988,6 +2093,10 @@ class LIR_List: public CompilationResourceObj {
+   const char *  _file;
+   int           _line;
+ #endif
++#ifdef RISCV
++  LIR_Opr       _cmp_opr1;
++  LIR_Opr       _cmp_opr2;
++#endif
+ 
+  public:
+   void append(LIR_Op* op) {
+@@ -2000,6 +2109,12 @@ class LIR_List: public CompilationResourceObj {
+     }
+ #endif // PRODUCT
+ 
++#ifdef RISCV
++    set_cmp_oprs(op);
++    // lir_cmp set cmp oprs only on riscv
++    if (op->code() == lir_cmp) return;
++#endif
 +
-+  // SYSCALL_RISCV_FLUSH_ICACHE is used to flush instruction cache. The "fence.i" instruction
-+  // only work on the current hart, so kernel provides the icache flush syscall to flush icache
-+  // on each hart. You can pass a flag to determine a global or local icache flush.
-+  static void icache_flush(long int start, long int end)
-+  {
-+    const int SYSCALL_RISCV_FLUSH_ICACHE = 259;
-+    register long int __a7 asm ("a7") = SYSCALL_RISCV_FLUSH_ICACHE;
-+    register long int __a0 asm ("a0") = start;
-+    register long int __a1 asm ("a1") = end;
-+    // the flush can be applied to either all threads or only the current.
-+    // 0 means a global icache flush, and the icache flush will be applied
-+    // to other harts concurrently executing.
-+    register long int __a2 asm ("a2") = 0;
-+    __asm__ volatile ("ecall\n\t"
-+                      : "+r" (__a0)
-+                      : "r" (__a0), "r" (__a1), "r" (__a2), "r" (__a7)
-+                      : "memory");
-+  }
+     _operations.append(op);
+ 
+ #ifdef ASSERT
+@@ -2016,6 +2131,10 @@ class LIR_List: public CompilationResourceObj {
+   void set_file_and_line(const char * file, int line);
+ #endif
+ 
++#ifdef RISCV
++  void set_cmp_oprs(LIR_Op* op);
++#endif
 +
-+#endif // OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP
-diff --git a/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
+   //---------- accessors ---------------
+   LIR_OpList* instructions_list()                { return &_operations; }
+   int         length() const                     { return _operations.length(); }
+@@ -2149,9 +2268,16 @@ class LIR_List: public CompilationResourceObj {
+   void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info);
+   void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* addr, CodeEmitInfo* info);
+ 
++#ifdef RISCV
++  void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type,
++             LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr) {
++    append(new LIR_Op4(lir_cmove, condition, src1, src2, cmp_opr1, cmp_opr2, dst, type));
++  }
++#else
+   void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
+     append(new LIR_Op2(lir_cmove, condition, src1, src2, dst, type));
+   }
++#endif
+ 
+   void cas_long(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value,
+                 LIR_Opr t1, LIR_Opr t2, LIR_Opr result = LIR_OprFact::illegalOpr);
+diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp
+index 160483d5f7..68aec26c1e 100644
+--- a/src/hotspot/share/c1/c1_LIRAssembler.cpp
++++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp
+@@ -709,9 +709,11 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
+       comp_fl2i(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op);
+       break;
+ 
++#ifndef RISCV
+     case lir_cmove:
+       cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type());
+       break;
++#endif
+ 
+     case lir_shl:
+     case lir_shr:
+@@ -776,6 +778,19 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
+   }
+ }
+ 
++#ifdef RISCV
++void LIR_Assembler::emit_op4(LIR_Op4* op) {
++  switch(op->code()) {
++    case lir_cmove:
++      cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type(), op->in_opr3(), op->in_opr4());
++      break;
++
++    default:
++      Unimplemented();
++      break;
++  }
++}
++#endif
+ 
+ void LIR_Assembler::build_frame() {
+   _masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
+diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp
+index 44a5bcbe54..baeb4aa442 100644
+--- a/src/hotspot/share/c1/c1_LIRAssembler.hpp
++++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp
+@@ -190,6 +190,9 @@ class LIR_Assembler: public CompilationResourceObj {
+   void emit_op1(LIR_Op1* op);
+   void emit_op2(LIR_Op2* op);
+   void emit_op3(LIR_Op3* op);
++#ifdef RISCV
++  void emit_op4(LIR_Op4* op);
++#endif
+   void emit_opBranch(LIR_OpBranch* op);
+   void emit_opLabel(LIR_OpLabel* op);
+   void emit_arraycopy(LIR_OpArrayCopy* op);
+@@ -222,8 +225,12 @@ class LIR_Assembler: public CompilationResourceObj {
+   void volatile_move_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info);
+   void comp_mem_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info);  // info set for null exceptions
+   void comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr result, LIR_Op2* op);
++#ifdef RISCV
++  void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type,
++             LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr);
++#else
+   void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type);
+-
++#endif
+   void call(        LIR_OpJavaCall* op, relocInfo::relocType rtype);
+   void ic_call(     LIR_OpJavaCall* op);
+   void vtable_call( LIR_OpJavaCall* op);
+diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp
+index acc969ac9c..512b63c744 100644
+--- a/src/hotspot/share/c1/c1_LinearScan.cpp
++++ b/src/hotspot/share/c1/c1_LinearScan.cpp
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -1242,8 +1242,13 @@ void LinearScan::add_register_hints(LIR_Op* op) {
+       break;
+     }
+     case lir_cmove: {
++#ifdef RISCV
++      assert(op->as_Op4() != NULL, "lir_cmove must be LIR_Op4");
++      LIR_Op4* cmove = (LIR_Op4*)op;
++#else
+       assert(op->as_Op2() != NULL, "lir_cmove must be LIR_Op2");
+       LIR_Op2* cmove = (LIR_Op2*)op;
++#endif
+ 
+       LIR_Opr move_from = cmove->in_opr1();
+       LIR_Opr move_to = cmove->result_opr();
+@@ -3148,6 +3153,9 @@ void LinearScan::do_linear_scan() {
+     }
+   }
+ 
++#ifndef RISCV
++  // Disable these optimizations on riscv temporarily, because it does not
++  // work when the comparison operands are bound to branches or cmoves.
+   { TIME_LINEAR_SCAN(timer_optimize_lir);
+ 
+     EdgeMoveOptimizer::optimize(ir()->code());
+@@ -3155,6 +3163,7 @@ void LinearScan::do_linear_scan() {
+     // check that cfg is still correct after optimizations
+     ir()->verify();
+   }
++#endif
+ 
+   NOT_PRODUCT(print_lir(1, "Before Code Generation", false));
+   NOT_PRODUCT(LinearScanStatistic::compute(this, _stat_final));
+@@ -6292,14 +6301,23 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) {
+               // There might be a cmove inserted for profiling which depends on the same
+               // compare. If we change the condition of the respective compare, we have
+               // to take care of this cmove as well.
++#ifdef RISCV
++              LIR_Op4* prev_cmove = NULL;
++#else
+               LIR_Op2* prev_cmove = NULL;
++#endif
+ 
+               for(int j = instructions->length() - 3; j >= 0 && prev_cmp == NULL; j--) {
+                 prev_op = instructions->at(j);
+                 // check for the cmove
+                 if (prev_op->code() == lir_cmove) {
++#ifdef RISCV
++                  assert(prev_op->as_Op4() != NULL, "cmove must be of type LIR_Op4");
++                  prev_cmove = (LIR_Op4*)prev_op;
++#else
+                   assert(prev_op->as_Op2() != NULL, "cmove must be of type LIR_Op2");
+                   prev_cmove = (LIR_Op2*)prev_op;
++#endif
+                   assert(prev_branch->cond() == prev_cmove->condition(), "should be the same");
+                 }
+                 if (prev_op->code() == lir_cmp) {
+diff --git a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
+index 4771a8b865..6d377fa005 100644
+--- a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
++++ b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved.
++ * Copyright (c) 2018, 2022, Red Hat, Inc. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -31,7 +31,7 @@
+ #include "utilities/defaultStream.hpp"
+ 
+ void ShenandoahArguments::initialize() {
+-#if !(defined AARCH64 || defined AMD64 || defined IA32)
++#if !(defined AARCH64 || defined AMD64 || defined IA32 || defined RISCV64)
+   vm_exit_during_initialization("Shenandoah GC is not supported on this platform.");
+ #endif
+ 
+diff --git a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp
+index 9f8ce74243..125cc169be 100644
+--- a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp
++++ b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -100,7 +100,11 @@ private:
+ 
+ public:
+   LIR_OpZLoadBarrierTest(LIR_Opr opr) :
++#ifdef RISCV
++      LIR_Op(lir_zloadbarrier_test, LIR_OprFact::illegalOpr, NULL),
++#else
+       LIR_Op(),
++#endif
+       _opr(opr) {}
+ 
+   virtual void visit(LIR_OpVisitState* state) {
+diff --git a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
+index e01a242a57..ff16de0e77 100644
+--- a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
++++ b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
+@@ -102,7 +102,7 @@ inline T JfrBigEndian::read_unaligned(const address location) {
+ inline bool JfrBigEndian::platform_supports_unaligned_reads(void) {
+ #if defined(IA32) || defined(AMD64) || defined(PPC) || defined(S390)
+   return true;
+-#elif defined(SPARC) || defined(ARM) || defined(AARCH64)
++#elif defined(SPARC) || defined(ARM) || defined(AARCH64) || defined(RISCV)
+   return false;
+ #else
+   #warning "Unconfigured platform"
+diff --git a/src/hotspot/share/runtime/abstract_vm_version.cpp b/src/hotspot/share/runtime/abstract_vm_version.cpp
+index a383297611..5e9228e705 100644
+--- a/src/hotspot/share/runtime/abstract_vm_version.cpp
++++ b/src/hotspot/share/runtime/abstract_vm_version.cpp
+@@ -196,7 +196,8 @@ const char* Abstract_VM_Version::jre_release_version() {
+                  IA32_ONLY("x86")                \
+                  IA64_ONLY("ia64")               \
+                  S390_ONLY("s390")               \
+-                 SPARC_ONLY("sparc")
++                 SPARC_ONLY("sparc")             \
++                 RISCV64_ONLY("riscv64")
+ #endif // !ZERO
+ #endif // !CPU
+ 
+diff --git a/src/hotspot/share/runtime/thread.hpp b/src/hotspot/share/runtime/thread.hpp
+index 8ac6d63586..6bb38c40cc 100644
+--- a/src/hotspot/share/runtime/thread.hpp
++++ b/src/hotspot/share/runtime/thread.hpp
+@@ -1261,7 +1261,7 @@ class JavaThread: public Thread {
+   address last_Java_pc(void)                     { return _anchor.last_Java_pc(); }
+ 
+   // Safepoint support
+-#if !(defined(PPC64) || defined(AARCH64))
++#if !(defined(PPC64) || defined(AARCH64) || defined(RISCV64))
+   JavaThreadState thread_state() const           { return _thread_state; }
+   void set_thread_state(JavaThreadState s)       {
+     assert(current_or_null() == NULL || current_or_null() == this,
+diff --git a/src/hotspot/share/runtime/thread.inline.hpp b/src/hotspot/share/runtime/thread.inline.hpp
+index dee8534f73..9af07aeb45 100644
+--- a/src/hotspot/share/runtime/thread.inline.hpp
++++ b/src/hotspot/share/runtime/thread.inline.hpp
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2012, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved.
+  * Copyright (c) 2021, Azul Systems, Inc. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+@@ -142,7 +142,7 @@ inline void JavaThread::set_pending_async_exception(oop e) {
+   set_has_async_exception();
+ }
+ 
+-#if defined(PPC64) || defined (AARCH64)
++#if defined(PPC64) || defined (AARCH64) || defined(RISCV64)
+ inline JavaThreadState JavaThread::thread_state() const    {
+   return (JavaThreadState) OrderAccess::load_acquire((volatile jint*)&_thread_state);
+ }
+diff --git a/src/hotspot/share/utilities/macros.hpp b/src/hotspot/share/utilities/macros.hpp
+index 6605ab367c..7f1bcff6b3 100644
+--- a/src/hotspot/share/utilities/macros.hpp
++++ b/src/hotspot/share/utilities/macros.hpp
+@@ -601,6 +601,32 @@
+ 
+ #define MACOS_AARCH64_ONLY(x) MACOS_ONLY(AARCH64_ONLY(x))
+ 
++#if defined(RISCV32) || defined(RISCV64)
++#define RISCV
++#define RISCV_ONLY(code) code
++#define NOT_RISCV(code)
++#else
++#undef RISCV
++#define RISCV_ONLY(code)
++#define NOT_RISCV(code) code
++#endif
++
++#ifdef RISCV32
++#define RISCV32_ONLY(code) code
++#define NOT_RISCV32(code)
++#else
++#define RISCV32_ONLY(code)
++#define NOT_RISCV32(code) code
++#endif
++
++#ifdef RISCV64
++#define RISCV64_ONLY(code) code
++#define NOT_RISCV64(code)
++#else
++#define RISCV64_ONLY(code)
++#define NOT_RISCV64(code) code
++#endif
++
+ #ifdef VM_LITTLE_ENDIAN
+ #define LITTLE_ENDIAN_ONLY(code) code
+ #define BIG_ENDIAN_ONLY(code)
+diff --git a/src/hotspot/share/utilities/vmassert_reinstall.hpp b/src/hotspot/share/utilities/vmassert_reinstall.hpp
 new file mode 100644
-index 00000000000..a6432c84ec7
+index 0000000000..32d31ac0c4
 --- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
-@@ -0,0 +1,38 @@
++++ b/src/hotspot/share/utilities/vmassert_reinstall.hpp
+@@ -0,0 +1,36 @@
 +/*
-+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -57697,28 +54670,26 @@ index 00000000000..a6432c84ec7
 + *
 + */
 +
-+#ifndef OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP
-+#define OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP
-+
-+#include "runtime/prefetch.hpp"
++// Intentionally no #include guard.  May be included multiple times for effect.
 +
++// See vmassert_uninstall.hpp for usage.
 +
-+inline void Prefetch::read (const void *loc, intx interval) {
-+}
++// Remove possible stdlib assert macro (or any others, for that matter).
++#undef assert
 +
-+inline void Prefetch::write(void *loc, intx interval) {
-+}
++// Reinstall HotSpot's assert macro, if previously defined.
++#ifdef vmassert
++#define assert(p, ...) vmassert(p, __VA_ARGS__)
++#endif
 +
-+#endif // OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP
-diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
+diff --git a/src/hotspot/share/utilities/vmassert_uninstall.hpp b/src/hotspot/share/utilities/vmassert_uninstall.hpp
 new file mode 100644
-index 00000000000..3100572e9fd
+index 0000000000..dd6d51633d
 --- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
-@@ -0,0 +1,92 @@
++++ b/src/hotspot/share/utilities/vmassert_uninstall.hpp
+@@ -0,0 +1,45 @@
 +/*
-+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -57741,82 +54712,163 @@ index 00000000000..3100572e9fd
 + *
 + */
 +
-+#include "precompiled.hpp"
-+#include "runtime/frame.inline.hpp"
-+#include "runtime/thread.inline.hpp"
-+
-+frame JavaThread::pd_last_frame() {
-+  assert(has_last_Java_frame(), "must have last_Java_sp() when suspended");
-+  return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc());
-+}
-+
-+// For Forte Analyzer AsyncGetCallTrace profiling support - thread is
-+// currently interrupted by SIGPROF
-+bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr,
-+  void* ucontext, bool isInJava) {
-+
-+  assert(Thread::current() == this, "caller must be current thread");
-+  return pd_get_top_frame(fr_addr, ucontext, isInJava);
-+}
++// Intentionally no #include guard.  May be included multiple times for effect.
 +
-+bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) {
-+  return pd_get_top_frame(fr_addr, ucontext, isInJava);
-+}
++// The files vmassert_uninstall.hpp and vmassert_reinstall.hpp provide a
++// workaround for the name collision between HotSpot's assert macro and the
++// Standard Library's assert macro.  When including a 3rd-party header that
++// uses (and so includes) the standard assert macro, wrap that inclusion with
++// includes of these two files, e.g.
++//
++// #include "utilities/vmassert_uninstall.hpp"
++// #include <header including standard assert macro>
++// #include "utilities/vmassert_reinstall.hpp"
++//
++// This removes the HotSpot macro definition while pre-processing the
++// 3rd-party header, then reinstates the HotSpot macro (if previously defined)
++// for following code.
 +
-+bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) {
-+  // If we have a last_Java_frame, then we should use it even if
-+  // isInJava == true.  It should be more reliable than ucontext info.
-+  if (has_last_Java_frame() && frame_anchor()->walkable()) {
-+    *fr_addr = pd_last_frame();
-+    return true;
-+  }
++// Remove HotSpot's assert macro, if present.
++#ifdef vmassert
++#undef assert
++#endif // vmassert
 +
-+  // At this point, we don't have a last_Java_frame, so
-+  // we try to glean some information out of the ucontext
-+  // if we were running Java code when SIGPROF came in.
-+  if (isInJava) {
-+    ucontext_t* uc = (ucontext_t*) ucontext;
+diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
+index 0d834302c5..45a927fb5e 100644
+--- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2002, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -58,6 +58,10 @@
+ #include "sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext.h"
+ #endif
+ 
++#ifdef riscv64
++#include "sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext.h"
++#endif
 +
-+    intptr_t* ret_fp = NULL;
-+    intptr_t* ret_sp = NULL;
-+    address addr = os::fetch_frame_from_context(uc, &ret_sp, &ret_fp);
-+    if (addr == NULL || ret_sp == NULL ) {
-+      // ucontext wasn't useful
-+      return false;
-+    }
+ static jfieldID p_ps_prochandle_ID = 0;
+ static jfieldID threadList_ID = 0;
+ static jfieldID loadObjectList_ID = 0;
+@@ -397,7 +401,7 @@ JNIEXPORT jbyteArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
+   return (err == PS_OK)? array : 0;
+ }
+ 
+-#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64)
++#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64) || defined(riscv64)
+ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0
+   (JNIEnv *env, jobject this_obj, jint lwp_id) {
+ 
+@@ -425,6 +429,9 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
+ #if defined(sparc) || defined(sparcv9)
+ #define NPRGREG sun_jvm_hotspot_debugger_sparc_SPARCThreadContext_NPRGREG
+ #endif
++#ifdef riscv64
++#define NPRGREG sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_NPRGREG
++#endif
+ #if defined(ppc64) || defined(ppc64le)
+ #define NPRGREG sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_NPRGREG
+ #endif
+@@ -534,6 +541,44 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
+   }
+ #endif /* aarch64 */
+ 
++#if defined(riscv64)
++#define REG_INDEX(reg)  sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_##reg
 +
-+    frame ret_frame(ret_sp, ret_fp, addr);
-+    if (!ret_frame.safe_for_sender(this)) {
-+#ifdef COMPILER2
-+      frame ret_frame2(ret_sp, NULL, addr);
-+      if (!ret_frame2.safe_for_sender(this)) {
-+        // nothing else to try if the frame isn't good
-+        return false;
-+      }
-+      ret_frame = ret_frame2;
-+#else
-+      // nothing else to try if the frame isn't good
-+      return false;
-+#endif /* COMPILER2 */
-+    }
-+    *fr_addr = ret_frame;
-+    return true;
-+  }
++  regs[REG_INDEX(PC)]  = gregs.pc;
++  regs[REG_INDEX(LR)]  = gregs.ra;
++  regs[REG_INDEX(SP)]  = gregs.sp;
++  regs[REG_INDEX(R3)]  = gregs.gp;
++  regs[REG_INDEX(R4)]  = gregs.tp;
++  regs[REG_INDEX(R5)]  = gregs.t0;
++  regs[REG_INDEX(R6)]  = gregs.t1;
++  regs[REG_INDEX(R7)]  = gregs.t2;
++  regs[REG_INDEX(R8)]  = gregs.s0;
++  regs[REG_INDEX(R9)]  = gregs.s1;
++  regs[REG_INDEX(R10)]  = gregs.a0;
++  regs[REG_INDEX(R11)]  = gregs.a1;
++  regs[REG_INDEX(R12)]  = gregs.a2;
++  regs[REG_INDEX(R13)]  = gregs.a3;
++  regs[REG_INDEX(R14)]  = gregs.a4;
++  regs[REG_INDEX(R15)]  = gregs.a5;
++  regs[REG_INDEX(R16)]  = gregs.a6;
++  regs[REG_INDEX(R17)]  = gregs.a7;
++  regs[REG_INDEX(R18)]  = gregs.s2;
++  regs[REG_INDEX(R19)]  = gregs.s3;
++  regs[REG_INDEX(R20)]  = gregs.s4;
++  regs[REG_INDEX(R21)]  = gregs.s5;
++  regs[REG_INDEX(R22)]  = gregs.s6;
++  regs[REG_INDEX(R23)]  = gregs.s7;
++  regs[REG_INDEX(R24)]  = gregs.s8;
++  regs[REG_INDEX(R25)]  = gregs.s9;
++  regs[REG_INDEX(R26)]  = gregs.s10;
++  regs[REG_INDEX(R27)]  = gregs.s11;
++  regs[REG_INDEX(R28)]  = gregs.t3;
++  regs[REG_INDEX(R29)]  = gregs.t4;
++  regs[REG_INDEX(R30)]  = gregs.t5;
++  regs[REG_INDEX(R31)]  = gregs.t6;
 +
-+  // nothing else to try
-+  return false;
-+}
++#endif /* riscv64 */
 +
-+void JavaThread::cache_global_variables() { }
-diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
+ #if defined(ppc64) || defined(ppc64le)
+ #define REG_INDEX(reg) sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_##reg
+ 
+diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
+index 8318e8e021..ab092d4ee3 100644
+--- a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -43,6 +43,8 @@
+ #elif defined(arm)
+ #include <asm/ptrace.h>
+ #define user_regs_struct  pt_regs
++#elif defined(riscv64)
++#include <asm/ptrace.h>
+ #endif
+ 
+ // This C bool type must be int for compatibility with Linux calls and
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
+index 0f5f0119c7..9bff9ee9b1 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
+@@ -36,6 +36,7 @@ import sun.jvm.hotspot.debugger.MachineDescription;
+ import sun.jvm.hotspot.debugger.MachineDescriptionAMD64;
+ import sun.jvm.hotspot.debugger.MachineDescriptionPPC64;
+ import sun.jvm.hotspot.debugger.MachineDescriptionAArch64;
++import sun.jvm.hotspot.debugger.MachineDescriptionRISCV64;
+ import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86;
+ import sun.jvm.hotspot.debugger.MachineDescriptionSPARC32Bit;
+ import sun.jvm.hotspot.debugger.MachineDescriptionSPARC64Bit;
+@@ -598,6 +599,8 @@ public class HotSpotAgent {
+             } else {
+                     machDesc = new MachineDescriptionSPARC32Bit();
+             }
++        } else if (cpu.equals("riscv64")) {
++            machDesc = new MachineDescriptionRISCV64();
+         } else {
+           try {
+             machDesc = (MachineDescription)
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java
 new file mode 100644
-index 00000000000..61e2cf85b63
+index 0000000000..a972516dee
 --- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
-@@ -0,0 +1,48 @@
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java
+@@ -0,0 +1,40 @@
 +/*
-+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -57839,43 +54891,78 @@ index 00000000000..61e2cf85b63
 + *
 + */
 +
-+#ifndef OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP
-+#define OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP
++package sun.jvm.hotspot.debugger;
 +
-+ private:
-+  void pd_initialize() {
-+    _anchor.clear();
++public class MachineDescriptionRISCV64 extends MachineDescriptionTwosComplement implements MachineDescription {
++  public long getAddressSize() {
++    return 8;
 +  }
 +
-+  frame pd_last_frame();
-+
-+ public:
-+  static ByteSize last_Java_fp_offset()          {
-+    return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset();
++  public boolean isLP64() {
++    return true;
 +  }
 +
-+  bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext,
-+    bool isInJava);
-+
-+  bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava);
-+private:
-+  bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava);
-+
-+#endif // OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP
-diff --git a/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp
-new file mode 100644
-index 00000000000..6cf7683a586
---- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp
-@@ -0,0 +1,55 @@
-+/*
-+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
++  public boolean isBigEndian() {
++    return false;
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
+index 5e5a6bb714..dc0bcb3da9 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
+  * Copyright (c) 2015, Red Hat Inc.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+@@ -34,12 +34,14 @@ import sun.jvm.hotspot.debugger.x86.*;
+ import sun.jvm.hotspot.debugger.amd64.*;
+ import sun.jvm.hotspot.debugger.aarch64.*;
+ import sun.jvm.hotspot.debugger.sparc.*;
++import sun.jvm.hotspot.debugger.riscv64.*;
+ import sun.jvm.hotspot.debugger.ppc64.*;
+ import sun.jvm.hotspot.debugger.linux.x86.*;
+ import sun.jvm.hotspot.debugger.linux.amd64.*;
+ import sun.jvm.hotspot.debugger.linux.sparc.*;
+ import sun.jvm.hotspot.debugger.linux.ppc64.*;
+ import sun.jvm.hotspot.debugger.linux.aarch64.*;
++import sun.jvm.hotspot.debugger.linux.riscv64.*;
+ import sun.jvm.hotspot.utilities.*;
+ 
+ class LinuxCDebugger implements CDebugger {
+@@ -116,7 +118,14 @@ class LinuxCDebugger implements CDebugger {
+        Address pc  = context.getRegisterAsAddress(AARCH64ThreadContext.PC);
+        if (pc == null) return null;
+        return new LinuxAARCH64CFrame(dbg, fp, pc);
+-     } else {
++    } else if (cpu.equals("riscv64")) {
++       RISCV64ThreadContext context = (RISCV64ThreadContext) thread.getContext();
++       Address fp = context.getRegisterAsAddress(RISCV64ThreadContext.FP);
++       if (fp == null) return null;
++       Address pc  = context.getRegisterAsAddress(RISCV64ThreadContext.PC);
++       if (pc == null) return null;
++       return new LinuxRISCV64CFrame(dbg, fp, pc);
++    } else {
+        // Runtime exception thrown by LinuxThreadContextFactory if unknown cpu
+        ThreadContext context = (ThreadContext) thread.getContext();
+        return context.getTopFrame(dbg);
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java
+new file mode 100644
+index 0000000000..f06da24bd0
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java
+@@ -0,0 +1,90 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
 + *
 + * This code is distributed in the hope that it will be useful, but WITHOUT
 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
@@ -57893,44 +54980,79 @@ index 00000000000..6cf7683a586
 + *
 + */
 +
-+#ifndef OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP
-+#define OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP
++package sun.jvm.hotspot.debugger.linux.riscv64;
 +
-+// These are the OS and CPU-specific fields, types and integer
-+// constants required by the Serviceability Agent. This file is
-+// referenced by vmStructs.cpp.
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.riscv64.*;
++import sun.jvm.hotspot.debugger.linux.*;
++import sun.jvm.hotspot.debugger.cdbg.*;
++import sun.jvm.hotspot.debugger.cdbg.basic.*;
 +
-+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
-+                                                                                                                                     \
-+  /******************************/                                                                                                   \
-+  /* Threads (NOTE: incomplete) */                                                                                                   \
-+  /******************************/                                                                                                   \
-+  nonstatic_field(OSThread,                      _thread_id,                                      OSThread::thread_id_t)             \
-+  nonstatic_field(OSThread,                      _pthread_id,                                     pthread_t)
++public final class LinuxRISCV64CFrame extends BasicCFrame {
++   private static final int C_FRAME_LINK_OFFSET        = -2;
++   private static final int C_FRAME_RETURN_ADDR_OFFSET = -1;
 +
++   public LinuxRISCV64CFrame(LinuxDebugger dbg, Address fp, Address pc) {
++      super(dbg.getCDebugger());
++      this.fp = fp;
++      this.pc = pc;
++      this.dbg = dbg;
++   }
 +
-+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
-+                                                                          \
-+  /**********************/                                                \
-+  /* Posix Thread IDs   */                                                \
-+  /**********************/                                                \
-+                                                                          \
-+  declare_integer_type(OSThread::thread_id_t)                             \
-+  declare_unsigned_integer_type(pthread_t)
++   // override base class impl to avoid ELF parsing
++   public ClosestSymbol closestSymbolToPC() {
++      // try native lookup in debugger.
++      return dbg.lookup(dbg.getAddressValue(pc()));
++   }
 +
-+#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++   public Address pc() {
++      return pc;
++   }
 +
-+#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++   public Address localVariableBase() {
++      return fp;
++   }
 +
-+#endif // OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP
-diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
++   public CFrame sender(ThreadProxy thread) {
++      RISCV64ThreadContext context = (RISCV64ThreadContext) thread.getContext();
++      Address rsp = context.getRegisterAsAddress(RISCV64ThreadContext.SP);
++
++      if ((fp == null) || fp.lessThan(rsp)) {
++        return null;
++      }
++
++      // Check alignment of fp
++      if (dbg.getAddressValue(fp) % (2 * ADDRESS_SIZE) != 0) {
++        return null;
++      }
++
++      Address nextFP = fp.getAddressAt(C_FRAME_LINK_OFFSET * ADDRESS_SIZE);
++      if (nextFP == null || nextFP.lessThanOrEqual(fp)) {
++        return null;
++      }
++      Address nextPC  = fp.getAddressAt(C_FRAME_RETURN_ADDR_OFFSET * ADDRESS_SIZE);
++      if (nextPC == null) {
++        return null;
++      }
++      return new LinuxRISCV64CFrame(dbg, nextFP, nextPC);
++   }
++
++   // package/class internals only
++   private static final int ADDRESS_SIZE = 8;
++   private Address pc;
++   private Address sp;
++   private Address fp;
++   private LinuxDebugger dbg;
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java
 new file mode 100644
-index 00000000000..4623dbfad42
+index 0000000000..fdb841ccf3
 --- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
-@@ -0,0 +1,118 @@
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java
+@@ -0,0 +1,48 @@
 +/*
-+ * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
 + * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -57954,17532 +55076,238 @@ index 00000000000..4623dbfad42
 + *
 + */
 +
-+#include "precompiled.hpp"
-+#include "asm/register.hpp"
-+#include "runtime/os.hpp"
-+#include "runtime/os.inline.hpp"
-+#include "runtime/vm_version.hpp"
++package sun.jvm.hotspot.debugger.linux.riscv64;
 +
-+#include <asm/hwcap.h>
-+#include <sys/auxv.h>
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.riscv64.*;
++import sun.jvm.hotspot.debugger.linux.*;
 +
-+#ifndef HWCAP_ISA_I
-+#define HWCAP_ISA_I  (1 << ('I' - 'A'))
-+#endif
++public class LinuxRISCV64ThreadContext extends RISCV64ThreadContext {
++  private LinuxDebugger debugger;
 +
-+#ifndef HWCAP_ISA_M
-+#define HWCAP_ISA_M  (1 << ('M' - 'A'))
-+#endif
++  public LinuxRISCV64ThreadContext(LinuxDebugger debugger) {
++    super();
++    this.debugger = debugger;
++  }
 +
-+#ifndef HWCAP_ISA_A
-+#define HWCAP_ISA_A  (1 << ('A' - 'A'))
-+#endif
++  public void setRegisterAsAddress(int index, Address value) {
++    setRegister(index, debugger.getAddressValue(value));
++  }
 +
-+#ifndef HWCAP_ISA_F
-+#define HWCAP_ISA_F  (1 << ('F' - 'A'))
-+#endif
++  public Address getRegisterAsAddress(int index) {
++    return debugger.newAddress(getRegister(index));
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java
+new file mode 100644
+index 0000000000..96d5dee47c
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java
+@@ -0,0 +1,88 @@
++/*
++ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+#ifndef HWCAP_ISA_D
-+#define HWCAP_ISA_D  (1 << ('D' - 'A'))
-+#endif
++package sun.jvm.hotspot.debugger.proc.riscv64;
 +
-+#ifndef HWCAP_ISA_C
-+#define HWCAP_ISA_C  (1 << ('C' - 'A'))
-+#endif
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.riscv64.*;
++import sun.jvm.hotspot.debugger.proc.*;
++import sun.jvm.hotspot.utilities.*;
 +
-+#ifndef HWCAP_ISA_V
-+#define HWCAP_ISA_V  (1 << ('V' - 'A'))
-+#endif
++public class ProcRISCV64Thread implements ThreadProxy {
++    private ProcDebugger debugger;
++    private int         id;
 +
-+#ifndef HWCAP_ISA_B
-+#define HWCAP_ISA_B  (1 << ('B' - 'A'))
-+#endif
++    public ProcRISCV64Thread(ProcDebugger debugger, Address addr) {
++        this.debugger = debugger;
 +
-+#define read_csr(csr)                                           \
-+({                                                              \
-+        register unsigned long __v;                             \
-+        __asm__ __volatile__ ("csrr %0, %1"                     \
-+                              : "=r" (__v)                      \
-+                              : "i" (csr)                       \
-+                              : "memory");                      \
-+        __v;                                                    \
-+})
++        // FIXME: the size here should be configurable. However, making it
++        // so would produce a dependency on the "types" package from the
++        // debugger package, which is not desired.
++        this.id       = (int) addr.getCIntegerAt(0, 4, true);
++    }
 +
-+uint32_t VM_Version::get_current_vector_length() {
-+  assert(_features & CPU_V, "should not call this");
-+  return (uint32_t)read_csr(CSR_VLENB);
-+}
++    public ProcRISCV64Thread(ProcDebugger debugger, long id) {
++        this.debugger = debugger;
++        this.id = (int) id;
++    }
 +
-+void VM_Version::get_os_cpu_info() {
++    public ThreadContext getContext() throws IllegalThreadStateException {
++        ProcRISCV64ThreadContext context = new ProcRISCV64ThreadContext(debugger);
++        long[] regs = debugger.getThreadIntegerRegisterSet(id);
++        if (Assert.ASSERTS_ENABLED) {
++            Assert.that(regs.length == RISCV64ThreadContext.NPRGREG, "size mismatch");
++        }
++        for (int i = 0; i < regs.length; i++) {
++            context.setRegister(i, regs[i]);
++        }
++        return context;
++    }
 +
-+  uint64_t auxv = getauxval(AT_HWCAP);
++    public boolean canSetContext() throws DebuggerException {
++        return false;
++    }
 +
-+  static_assert(CPU_I == HWCAP_ISA_I, "Flag CPU_I must follow Linux HWCAP");
-+  static_assert(CPU_M == HWCAP_ISA_M, "Flag CPU_M must follow Linux HWCAP");
-+  static_assert(CPU_A == HWCAP_ISA_A, "Flag CPU_A must follow Linux HWCAP");
-+  static_assert(CPU_F == HWCAP_ISA_F, "Flag CPU_F must follow Linux HWCAP");
-+  static_assert(CPU_D == HWCAP_ISA_D, "Flag CPU_D must follow Linux HWCAP");
-+  static_assert(CPU_C == HWCAP_ISA_C, "Flag CPU_C must follow Linux HWCAP");
-+  static_assert(CPU_V == HWCAP_ISA_V, "Flag CPU_V must follow Linux HWCAP");
-+  static_assert(CPU_B == HWCAP_ISA_B, "Flag CPU_B must follow Linux HWCAP");
-+  _features = auxv & (
-+      HWCAP_ISA_I |
-+      HWCAP_ISA_M |
-+      HWCAP_ISA_A |
-+      HWCAP_ISA_F |
-+      HWCAP_ISA_D |
-+      HWCAP_ISA_C |
-+      HWCAP_ISA_V |
-+      HWCAP_ISA_B);
++    public void setContext(ThreadContext context)
++    throws IllegalThreadStateException, DebuggerException {
++        throw new DebuggerException("Unimplemented");
++    }
 +
-+  if (FILE *f = fopen("/proc/cpuinfo", "r")) {
-+    char buf[512], *p;
-+    while (fgets(buf, sizeof (buf), f) != NULL) {
-+      if ((p = strchr(buf, ':')) != NULL) {
-+        if (strncmp(buf, "uarch", sizeof "uarch" - 1) == 0) {
-+          char* uarch = os::strdup(p + 2);
-+          uarch[strcspn(uarch, "\n")] = '\0';
-+          _uarch = uarch;
-+          break;
++    public String toString() {
++        return "t@" + id;
++    }
++
++    public boolean equals(Object obj) {
++        if ((obj == null) || !(obj instanceof ProcRISCV64Thread)) {
++            return false;
 +        }
-+      }
++
++        return (((ProcRISCV64Thread) obj).id == id);
++    }
++
++    public int hashCode() {
++        return id;
 +    }
-+    fclose(f);
-+  }
 +}
-diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp
-index e30d39f73d1..733ee9e654c 100644
---- a/src/hotspot/share/c1/c1_LIR.cpp
-+++ b/src/hotspot/share/c1/c1_LIR.cpp
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -199,7 +199,6 @@ bool LIR_OprDesc::is_oop() const {
- void LIR_Op2::verify() const {
- #ifdef ASSERT
-   switch (code()) {
--    case lir_cmove:
-     case lir_xchg:
-       break;
- 
-@@ -252,9 +251,7 @@ void LIR_Op2::verify() const {
- 
- 
- LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block)
--  : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
--  , _cond(cond)
--  , _type(type)
-+  : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type)
-   , _label(block->label())
-   , _block(block)
-   , _ublock(NULL)
-@@ -262,9 +259,7 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block
- }
- 
- LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub) :
--  LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
--  , _cond(cond)
--  , _type(type)
-+  LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type)
-   , _label(stub->entry())
-   , _block(NULL)
-   , _ublock(NULL)
-@@ -272,9 +267,7 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub) :
- }
- 
- LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock)
--  : LIR_Op(lir_cond_float_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
--  , _cond(cond)
--  , _type(type)
-+  : LIR_Op2(lir_cond_float_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type)
-   , _label(block->label())
-   , _block(block)
-   , _ublock(ublock)
-@@ -296,13 +289,13 @@ void LIR_OpBranch::change_ublock(BlockBegin* b) {
- }
- 
- void LIR_OpBranch::negate_cond() {
--  switch (_cond) {
--    case lir_cond_equal:        _cond = lir_cond_notEqual;     break;
--    case lir_cond_notEqual:     _cond = lir_cond_equal;        break;
--    case lir_cond_less:         _cond = lir_cond_greaterEqual; break;
--    case lir_cond_lessEqual:    _cond = lir_cond_greater;      break;
--    case lir_cond_greaterEqual: _cond = lir_cond_less;         break;
--    case lir_cond_greater:      _cond = lir_cond_lessEqual;    break;
-+  switch (cond()) {
-+    case lir_cond_equal:        set_cond(lir_cond_notEqual);     break;
-+    case lir_cond_notEqual:     set_cond(lir_cond_equal);        break;
-+    case lir_cond_less:         set_cond(lir_cond_greaterEqual); break;
-+    case lir_cond_lessEqual:    set_cond(lir_cond_greater);      break;
-+    case lir_cond_greaterEqual: set_cond(lir_cond_less);         break;
-+    case lir_cond_greater:      set_cond(lir_cond_lessEqual);    break;
-     default: ShouldNotReachHere();
-   }
- }
-@@ -525,6 +518,13 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
-       assert(op->as_OpBranch() != NULL, "must be");
-       LIR_OpBranch* opBranch = (LIR_OpBranch*)op;
- 
-+      assert(opBranch->_tmp1->is_illegal() && opBranch->_tmp2->is_illegal() &&
-+             opBranch->_tmp3->is_illegal() && opBranch->_tmp4->is_illegal() &&
-+             opBranch->_tmp5->is_illegal(), "not used");
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java
+new file mode 100644
+index 0000000000..f2aa845e66
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java
+@@ -0,0 +1,48 @@
++/*
++ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+      if (opBranch->_opr1->is_valid()) do_input(opBranch->_opr1);
-+      if (opBranch->_opr2->is_valid()) do_input(opBranch->_opr2);
++package sun.jvm.hotspot.debugger.proc.riscv64;
 +
-       if (opBranch->_info != NULL)     do_info(opBranch->_info);
-       assert(opBranch->_result->is_illegal(), "not used");
-       if (opBranch->_stub != NULL)     opBranch->stub()->visit(this);
-@@ -615,17 +615,19 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
-     // to the result operand, otherwise the backend fails
-     case lir_cmove:
-     {
--      assert(op->as_Op2() != NULL, "must be");
--      LIR_Op2* op2 = (LIR_Op2*)op;
-+      assert(op->as_Op4() != NULL, "must be");
-+      LIR_Op4* op4 = (LIR_Op4*)op;
- 
--      assert(op2->_info == NULL && op2->_tmp1->is_illegal() && op2->_tmp2->is_illegal() &&
--             op2->_tmp3->is_illegal() && op2->_tmp4->is_illegal() && op2->_tmp5->is_illegal(), "not used");
--      assert(op2->_opr1->is_valid() && op2->_opr2->is_valid() && op2->_result->is_valid(), "used");
-+      assert(op4->_info == NULL && op4->_tmp1->is_illegal() && op4->_tmp2->is_illegal() &&
-+             op4->_tmp3->is_illegal() && op4->_tmp4->is_illegal() && op4->_tmp5->is_illegal(), "not used");
-+      assert(op4->_opr1->is_valid() && op4->_opr2->is_valid() && op4->_result->is_valid(), "used");
- 
--      do_input(op2->_opr1);
--      do_input(op2->_opr2);
--      do_temp(op2->_opr2);
--      do_output(op2->_result);
-+      do_input(op4->_opr1);
-+      do_input(op4->_opr2);
-+      if (op4->_opr3->is_valid()) do_input(op4->_opr3);
-+      if (op4->_opr4->is_valid()) do_input(op4->_opr4);
-+      do_temp(op4->_opr2);
-+      do_output(op4->_result);
- 
-       break;
-     }
-@@ -1048,6 +1050,10 @@ void LIR_Op3::emit_code(LIR_Assembler* masm) {
-   masm->emit_op3(this);
- }
- 
-+void LIR_Op4::emit_code(LIR_Assembler* masm) {
-+  masm->emit_op4(this);
-+}
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.riscv64.*;
++import sun.jvm.hotspot.debugger.proc.*;
 +
- void LIR_OpLock::emit_code(LIR_Assembler* masm) {
-   masm->emit_lock(this);
-   if (stub()) {
-@@ -1084,6 +1090,10 @@ LIR_List::LIR_List(Compilation* compilation, BlockBegin* block)
-   , _file(NULL)
-   , _line(0)
- #endif
-+#ifdef RISCV
-+  , _cmp_opr1(LIR_OprFact::illegalOpr)
-+  , _cmp_opr2(LIR_OprFact::illegalOpr)
-+#endif
- { }
- 
- 
-@@ -1101,6 +1111,38 @@ void LIR_List::set_file_and_line(const char * file, int line) {
- }
- #endif
- 
-+#ifdef RISCV
-+void LIR_List::set_cmp_oprs(LIR_Op* op) {
-+  switch (op->code()) {
-+    case lir_cmp:
-+      _cmp_opr1 = op->as_Op2()->in_opr1();
-+      _cmp_opr2 = op->as_Op2()->in_opr2();
-+      break;
-+    case lir_branch: // fall through
-+    case lir_cond_float_branch:
-+      assert(op->as_OpBranch()->cond() == lir_cond_always ||
-+            (_cmp_opr1 != LIR_OprFact::illegalOpr && _cmp_opr2 != LIR_OprFact::illegalOpr),
-+            "conditional branches must have legal operands");
-+      if (op->as_OpBranch()->cond() != lir_cond_always) {
-+        op->as_Op2()->set_in_opr1(_cmp_opr1);
-+        op->as_Op2()->set_in_opr2(_cmp_opr2);
-+      }
-+      break;
-+    case lir_cmove:
-+      op->as_Op4()->set_in_opr3(_cmp_opr1);
-+      op->as_Op4()->set_in_opr4(_cmp_opr2);
-+      break;
-+#if INCLUDE_ZGC
-+    case lir_zloadbarrier_test:
-+      _cmp_opr1 = FrameMap::as_opr(t1);
-+      _cmp_opr2 = LIR_OprFact::intConst(0);
-+      break;
-+#endif
-+    default:
-+      break;
-+  }
++public class ProcRISCV64ThreadContext extends RISCV64ThreadContext {
++    private ProcDebugger debugger;
++
++    public ProcRISCV64ThreadContext(ProcDebugger debugger) {
++        super();
++        this.debugger = debugger;
++    }
++
++    public void setRegisterAsAddress(int index, Address value) {
++        setRegister(index, debugger.getAddressValue(value));
++    }
++
++    public Address getRegisterAsAddress(int index) {
++        return debugger.newAddress(getRegister(index));
++    }
 +}
-+#endif
- 
- void LIR_List::append(LIR_InsertionBuffer* buffer) {
-   assert(this == buffer->lir_list(), "wrong lir list");
-@@ -1680,7 +1722,6 @@ const char * LIR_Op::name() const {
-      case lir_cmp_l2i:               s = "cmp_l2i";       break;
-      case lir_ucmp_fd2i:             s = "ucomp_fd2i";    break;
-      case lir_cmp_fd2i:              s = "comp_fd2i";     break;
--     case lir_cmove:                 s = "cmove";         break;
-      case lir_add:                   s = "add";           break;
-      case lir_sub:                   s = "sub";           break;
-      case lir_mul:                   s = "mul";           break;
-@@ -1705,6 +1746,8 @@ const char * LIR_Op::name() const {
-      case lir_irem:                  s = "irem";          break;
-      case lir_fmad:                  s = "fmad";          break;
-      case lir_fmaf:                  s = "fmaf";          break;
-+     // LIR_Op4
-+     case lir_cmove:                 s = "cmove";         break;
-      // LIR_OpJavaCall
-      case lir_static_call:           s = "static";        break;
-      case lir_optvirtual_call:       s = "optvirtual";    break;
-@@ -1841,6 +1884,8 @@ void LIR_Op1::print_patch_code(outputStream* out, LIR_PatchCode code) {
- // LIR_OpBranch
- void LIR_OpBranch::print_instr(outputStream* out) const {
-   print_condition(out, cond());             out->print(" ");
-+  in_opr1()->print(out); out->print(" ");
-+  in_opr2()->print(out); out->print(" ");
-   if (block() != NULL) {
-     out->print("[B%d] ", block()->block_id());
-   } else if (stub() != NULL) {
-@@ -1927,7 +1972,7 @@ void LIR_OpRoundFP::print_instr(outputStream* out) const {
- 
- // LIR_Op2
- void LIR_Op2::print_instr(outputStream* out) const {
--  if (code() == lir_cmove || code() == lir_cmp) {
-+  if (code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch) {
-     print_condition(out, condition());         out->print(" ");
-   }
-   in_opr1()->print(out);    out->print(" ");
-@@ -1978,6 +2023,15 @@ void LIR_Op3::print_instr(outputStream* out) const {
-   result_opr()->print(out);
- }
- 
-+// LIR_Op4
-+void LIR_Op4::print_instr(outputStream* out) const {
-+  print_condition(out, condition()); out->print(" ");
-+  in_opr1()->print(out);             out->print(" ");
-+  in_opr2()->print(out);             out->print(" ");
-+  in_opr3()->print(out);             out->print(" ");
-+  in_opr4()->print(out);             out->print(" ");
-+  result_opr()->print(out);
-+}
- 
- void LIR_OpLock::print_instr(outputStream* out) const {
-   hdr_opr()->print(out);   out->print(" ");
-diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp
-index 3234ca018b7..efff6bf7a30 100644
---- a/src/hotspot/share/c1/c1_LIR.hpp
-+++ b/src/hotspot/share/c1/c1_LIR.hpp
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -867,6 +867,7 @@ class    LIR_Op2;
- class    LIR_OpDelay;
- class    LIR_Op3;
- class      LIR_OpAllocArray;
-+class    LIR_Op4;
- class    LIR_OpCall;
- class      LIR_OpJavaCall;
- class      LIR_OpRTCall;
-@@ -916,8 +917,6 @@ enum LIR_Code {
-       , lir_null_check
-       , lir_return
-       , lir_leal
--      , lir_branch
--      , lir_cond_float_branch
-       , lir_move
-       , lir_convert
-       , lir_alloc_object
-@@ -929,11 +928,12 @@ enum LIR_Code {
-       , lir_unwind
-   , end_op1
-   , begin_op2
-+      , lir_branch
-+      , lir_cond_float_branch
-       , lir_cmp
-       , lir_cmp_l2i
-       , lir_ucmp_fd2i
-       , lir_cmp_fd2i
--      , lir_cmove
-       , lir_add
-       , lir_sub
-       , lir_mul
-@@ -964,6 +964,9 @@ enum LIR_Code {
-       , lir_fmad
-       , lir_fmaf
-   , end_op3
-+  , begin_op4
-+      , lir_cmove
-+  , end_op4
-   , begin_opJavaCall
-       , lir_static_call
-       , lir_optvirtual_call
-@@ -1001,6 +1004,11 @@ enum LIR_Code {
-   , begin_opAssert
-     , lir_assert
-   , end_opAssert
-+#ifdef INCLUDE_ZGC
-+  , begin_opZLoadBarrierTest
-+    , lir_zloadbarrier_test
-+  , end_opZLoadBarrierTest
-+#endif
- };
- 
- 
-@@ -1134,6 +1142,7 @@ class LIR_Op: public CompilationResourceObj {
-   virtual LIR_Op1* as_Op1() { return NULL; }
-   virtual LIR_Op2* as_Op2() { return NULL; }
-   virtual LIR_Op3* as_Op3() { return NULL; }
-+  virtual LIR_Op4* as_Op4() { return NULL; }
-   virtual LIR_OpArrayCopy* as_OpArrayCopy() { return NULL; }
-   virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return NULL; }
-   virtual LIR_OpTypeCheck* as_OpTypeCheck() { return NULL; }
-@@ -1410,51 +1419,6 @@ class LIR_OpRTCall: public LIR_OpCall {
-   virtual void verify() const;
- };
- 
--
--class LIR_OpBranch: public LIR_Op {
-- friend class LIR_OpVisitState;
--
-- private:
--  LIR_Condition _cond;
--  BasicType     _type;
--  Label*        _label;
--  BlockBegin*   _block;  // if this is a branch to a block, this is the block
--  BlockBegin*   _ublock; // if this is a float-branch, this is the unorderd block
--  CodeStub*     _stub;   // if this is a branch to a stub, this is the stub
--
-- public:
--  LIR_OpBranch(LIR_Condition cond, BasicType type, Label* lbl)
--    : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL)
--    , _cond(cond)
--    , _type(type)
--    , _label(lbl)
--    , _block(NULL)
--    , _ublock(NULL)
--    , _stub(NULL) { }
--
--  LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block);
--  LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub);
--
--  // for unordered comparisons
--  LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock);
--
--  LIR_Condition cond()        const              { return _cond;        }
--  BasicType     type()        const              { return _type;        }
--  Label*        label()       const              { return _label;       }
--  BlockBegin*   block()       const              { return _block;       }
--  BlockBegin*   ublock()      const              { return _ublock;      }
--  CodeStub*     stub()        const              { return _stub;       }
--
--  void          change_block(BlockBegin* b);
--  void          change_ublock(BlockBegin* b);
--  void          negate_cond();
--
--  virtual void emit_code(LIR_Assembler* masm);
--  virtual LIR_OpBranch* as_OpBranch() { return this; }
--  virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
--};
--
--
- class ConversionStub;
- 
- class LIR_OpConvert: public LIR_Op1 {
-@@ -1614,19 +1578,19 @@ class LIR_Op2: public LIR_Op {
-   void verify() const;
- 
-  public:
--  LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, CodeEmitInfo* info = NULL)
-+  LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, CodeEmitInfo* info = NULL, BasicType type = T_ILLEGAL)
-     : LIR_Op(code, LIR_OprFact::illegalOpr, info)
-     , _opr1(opr1)
-     , _opr2(opr2)
--    , _type(T_ILLEGAL)
--    , _condition(condition)
-     , _fpu_stack_size(0)
-+    , _type(type)
-     , _tmp1(LIR_OprFact::illegalOpr)
-     , _tmp2(LIR_OprFact::illegalOpr)
-     , _tmp3(LIR_OprFact::illegalOpr)
-     , _tmp4(LIR_OprFact::illegalOpr)
--    , _tmp5(LIR_OprFact::illegalOpr) {
--    assert(code == lir_cmp || code == lir_assert, "code check");
-+    , _tmp5(LIR_OprFact::illegalOpr)
-+    , _condition(condition) {
-+    assert(code == lir_cmp || code == lir_branch || code == lir_cond_float_branch || code == lir_assert, "code check");
-   }
- 
-   LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type)
-@@ -1651,14 +1615,14 @@ class LIR_Op2: public LIR_Op {
-     , _opr1(opr1)
-     , _opr2(opr2)
-     , _type(type)
--    , _condition(lir_cond_unknown)
-     , _fpu_stack_size(0)
-     , _tmp1(LIR_OprFact::illegalOpr)
-     , _tmp2(LIR_OprFact::illegalOpr)
-     , _tmp3(LIR_OprFact::illegalOpr)
-     , _tmp4(LIR_OprFact::illegalOpr)
--    , _tmp5(LIR_OprFact::illegalOpr) {
--    assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
-+    , _tmp5(LIR_OprFact::illegalOpr)
-+    , _condition(lir_cond_unknown) {
-+    assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check");
-   }
- 
-   LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr,
-@@ -1667,14 +1631,14 @@ class LIR_Op2: public LIR_Op {
-     , _opr1(opr1)
-     , _opr2(opr2)
-     , _type(T_ILLEGAL)
--    , _condition(lir_cond_unknown)
-     , _fpu_stack_size(0)
-     , _tmp1(tmp1)
-     , _tmp2(tmp2)
-     , _tmp3(tmp3)
-     , _tmp4(tmp4)
--    , _tmp5(tmp5) {
--    assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
-+    , _tmp5(tmp5)
-+    , _condition(lir_cond_unknown) {
-+    assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check");
-   }
- 
-   LIR_Opr in_opr1() const                        { return _opr1; }
-@@ -1686,10 +1650,10 @@ class LIR_Op2: public LIR_Op {
-   LIR_Opr tmp4_opr() const                       { return _tmp4; }
-   LIR_Opr tmp5_opr() const                       { return _tmp5; }
-   LIR_Condition condition() const  {
--    assert(code() == lir_cmp || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); return _condition;
-+    assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch || code() == lir_assert, "only valid for branch and assert"); return _condition;
-   }
-   void set_condition(LIR_Condition condition) {
--    assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove");  _condition = condition;
-+    assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch, "only valid for branch"); _condition = condition;
-   }
- 
-   void set_fpu_stack_size(int size)              { _fpu_stack_size = size; }
-@@ -1703,6 +1667,51 @@ class LIR_Op2: public LIR_Op {
-   virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
- };
- 
-+class LIR_OpBranch: public LIR_Op2 {
-+ friend class LIR_OpVisitState;
-+
-+ private:
-+  Label*        _label;
-+  BlockBegin*   _block;  // if this is a branch to a block, this is the block
-+  BlockBegin*   _ublock; // if this is a float-branch, this is the unorderd block
-+  CodeStub*     _stub;   // if this is a branch to a stub, this is the stub
-+
-+ public:
-+  LIR_OpBranch(LIR_Condition cond, BasicType type, Label* lbl)
-+    : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL, type)
-+    , _label(lbl)
-+    , _block(NULL)
-+    , _ublock(NULL)
-+    , _stub(NULL) { }
-+
-+  LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block);
-+  LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub);
-+
-+  // for unordered comparisons
-+  LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock);
-+
-+  LIR_Condition cond() const {
-+    return condition();
-+  }
-+
-+  void set_cond(LIR_Condition cond) {
-+    set_condition(cond);
-+  }
-+
-+  Label*        label()       const              { return _label;       }
-+  BlockBegin*   block()       const              { return _block;       }
-+  BlockBegin*   ublock()      const              { return _ublock;      }
-+  CodeStub*     stub()        const              { return _stub;        }
-+
-+  void          change_block(BlockBegin* b);
-+  void          change_ublock(BlockBegin* b);
-+  void          negate_cond();
-+
-+  virtual void emit_code(LIR_Assembler* masm);
-+  virtual LIR_OpBranch* as_OpBranch() { return this; }
-+  virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
-+};
-+
- class LIR_OpAllocArray : public LIR_Op {
-  friend class LIR_OpVisitState;
- 
-@@ -1766,6 +1775,63 @@ class LIR_Op3: public LIR_Op {
-   virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
- };
- 
-+class LIR_Op4: public LIR_Op {
-+  friend class LIR_OpVisitState;
-+ protected:
-+  LIR_Opr   _opr1;
-+  LIR_Opr   _opr2;
-+  LIR_Opr   _opr3;
-+  LIR_Opr   _opr4;
-+  BasicType _type;
-+  LIR_Opr   _tmp1;
-+  LIR_Opr   _tmp2;
-+  LIR_Opr   _tmp3;
-+  LIR_Opr   _tmp4;
-+  LIR_Opr   _tmp5;
-+  LIR_Condition _condition;
-+
-+ public:
-+  LIR_Op4(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr opr3, LIR_Opr opr4,
-+          LIR_Opr result, BasicType type)
-+    : LIR_Op(code, result, NULL)
-+    , _opr1(opr1)
-+    , _opr2(opr2)
-+    , _opr3(opr3)
-+    , _opr4(opr4)
-+    , _type(type)
-+    , _tmp1(LIR_OprFact::illegalOpr)
-+    , _tmp2(LIR_OprFact::illegalOpr)
-+    , _tmp3(LIR_OprFact::illegalOpr)
-+    , _tmp4(LIR_OprFact::illegalOpr)
-+    , _tmp5(LIR_OprFact::illegalOpr)
-+    , _condition(condition) {
-+    assert(code == lir_cmove, "code check");
-+    assert(type != T_ILLEGAL, "cmove should have type");
-+  }
-+
-+  LIR_Opr in_opr1() const                        { return _opr1; }
-+  LIR_Opr in_opr2() const                        { return _opr2; }
-+  LIR_Opr in_opr3() const                        { return _opr3; }
-+  LIR_Opr in_opr4() const                        { return _opr4; }
-+  BasicType type()  const                        { return _type; }
-+  LIR_Opr tmp1_opr() const                       { return _tmp1; }
-+  LIR_Opr tmp2_opr() const                       { return _tmp2; }
-+  LIR_Opr tmp3_opr() const                       { return _tmp3; }
-+  LIR_Opr tmp4_opr() const                       { return _tmp4; }
-+  LIR_Opr tmp5_opr() const                       { return _tmp5; }
-+
-+  LIR_Condition condition() const                { return _condition; }
-+  void set_condition(LIR_Condition condition)    { _condition = condition; }
-+
-+  void set_in_opr1(LIR_Opr opr)                  { _opr1 = opr; }
-+  void set_in_opr2(LIR_Opr opr)                  { _opr2 = opr; }
-+  void set_in_opr3(LIR_Opr opr)                  { _opr3 = opr; }
-+  void set_in_opr4(LIR_Opr opr)                  { _opr4 = opr; }
-+  virtual void emit_code(LIR_Assembler* masm);
-+  virtual LIR_Op4* as_Op4() { return this; }
-+
-+  virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
-+};
- 
- //--------------------------------
- class LabelObj: public CompilationResourceObj {
-@@ -1988,6 +2054,10 @@ class LIR_List: public CompilationResourceObj {
-   const char *  _file;
-   int           _line;
- #endif
-+#ifdef RISCV
-+  LIR_Opr       _cmp_opr1;
-+  LIR_Opr       _cmp_opr2;
-+#endif
- 
-  public:
-   void append(LIR_Op* op) {
-@@ -2000,6 +2070,12 @@ class LIR_List: public CompilationResourceObj {
-     }
- #endif // PRODUCT
- 
-+#ifdef RISCV
-+    set_cmp_oprs(op);
-+    // lir_cmp set cmp oprs only on riscv
-+    if (op->code() == lir_cmp) return;
-+#endif
-+
-     _operations.append(op);
- 
- #ifdef ASSERT
-@@ -2016,6 +2092,10 @@ class LIR_List: public CompilationResourceObj {
-   void set_file_and_line(const char * file, int line);
- #endif
- 
-+#ifdef RISCV
-+  void set_cmp_oprs(LIR_Op* op);
-+#endif
-+
-   //---------- accessors ---------------
-   LIR_OpList* instructions_list()                { return &_operations; }
-   int         length() const                     { return _operations.length(); }
-@@ -2149,8 +2229,9 @@ class LIR_List: public CompilationResourceObj {
-   void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info);
-   void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* addr, CodeEmitInfo* info);
- 
--  void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
--    append(new LIR_Op2(lir_cmove, condition, src1, src2, dst, type));
-+  void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type,
-+             LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr) {
-+    append(new LIR_Op4(lir_cmove, condition, src1, src2, cmp_opr1, cmp_opr2, dst, type));
-   }
- 
-   void cas_long(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value,
-diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp
-index 160483d5f74..42a0350f7d9 100644
---- a/src/hotspot/share/c1/c1_LIRAssembler.cpp
-+++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp
-@@ -709,10 +709,6 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
-       comp_fl2i(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op);
-       break;
- 
--    case lir_cmove:
--      cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type());
--      break;
--
-     case lir_shl:
-     case lir_shr:
-     case lir_ushr:
-@@ -776,6 +772,17 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
-   }
- }
- 
-+void LIR_Assembler::emit_op4(LIR_Op4* op) {
-+  switch(op->code()) {
-+    case lir_cmove:
-+      cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type(), op->in_opr3(), op->in_opr4());
-+      break;
-+
-+    default:
-+      Unimplemented();
-+      break;
-+  }
-+}
- 
- void LIR_Assembler::build_frame() {
-   _masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
-diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp
-index 44a5bcbe542..c677bd346fc 100644
---- a/src/hotspot/share/c1/c1_LIRAssembler.hpp
-+++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp
-@@ -190,6 +190,7 @@ class LIR_Assembler: public CompilationResourceObj {
-   void emit_op1(LIR_Op1* op);
-   void emit_op2(LIR_Op2* op);
-   void emit_op3(LIR_Op3* op);
-+  void emit_op4(LIR_Op4* op);
-   void emit_opBranch(LIR_OpBranch* op);
-   void emit_opLabel(LIR_OpLabel* op);
-   void emit_arraycopy(LIR_OpArrayCopy* op);
-@@ -222,8 +223,8 @@ class LIR_Assembler: public CompilationResourceObj {
-   void volatile_move_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info);
-   void comp_mem_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info);  // info set for null exceptions
-   void comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr result, LIR_Op2* op);
--  void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type);
--
-+  void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type,
-+             LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr);
-   void call(        LIR_OpJavaCall* op, relocInfo::relocType rtype);
-   void ic_call(     LIR_OpJavaCall* op);
-   void vtable_call( LIR_OpJavaCall* op);
-diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp
-index c28055fd996..a4dfe8552ae 100644
---- a/src/hotspot/share/c1/c1_LinearScan.cpp
-+++ b/src/hotspot/share/c1/c1_LinearScan.cpp
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -1242,11 +1242,11 @@ void LinearScan::add_register_hints(LIR_Op* op) {
-       break;
-     }
-     case lir_cmove: {
--      assert(op->as_Op2() != NULL, "lir_cmove must be LIR_Op2");
--      LIR_Op2* cmove = (LIR_Op2*)op;
-+      assert(op->as_Op4() != NULL, "lir_cmove must be LIR_Op4");
-+      LIR_Op4* cmove = (LIR_Op4*)op;
- 
-       LIR_Opr move_from = cmove->in_opr1();
--      LIR_Opr move_to = cmove->result_opr();
-+      LIR_Opr move_to   = cmove->result_opr();
- 
-       if (move_to->is_register() && move_from->is_register()) {
-         Interval* from = interval_at(reg_num(move_from));
-@@ -3140,6 +3140,9 @@ void LinearScan::do_linear_scan() {
-     }
-   }
- 
-+#ifndef RISCV
-+  // Disable these optimizations on riscv temporarily, because it does not
-+  // work when the comparison operands are bound to branches or cmoves.
-   { TIME_LINEAR_SCAN(timer_optimize_lir);
- 
-     EdgeMoveOptimizer::optimize(ir()->code());
-@@ -3147,6 +3150,7 @@ void LinearScan::do_linear_scan() {
-     // check that cfg is still correct after optimizations
-     ir()->verify();
-   }
-+#endif
- 
-   NOT_PRODUCT(print_lir(1, "Before Code Generation", false));
-   NOT_PRODUCT(LinearScanStatistic::compute(this, _stat_final));
-@@ -6284,14 +6288,14 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) {
-               // There might be a cmove inserted for profiling which depends on the same
-               // compare. If we change the condition of the respective compare, we have
-               // to take care of this cmove as well.
--              LIR_Op2* prev_cmove = NULL;
-+              LIR_Op4* prev_cmove = NULL;
- 
-               for(int j = instructions->length() - 3; j >= 0 && prev_cmp == NULL; j--) {
-                 prev_op = instructions->at(j);
-                 // check for the cmove
-                 if (prev_op->code() == lir_cmove) {
--                  assert(prev_op->as_Op2() != NULL, "cmove must be of type LIR_Op2");
--                  prev_cmove = (LIR_Op2*)prev_op;
-+                  assert(prev_op->as_Op4() != NULL, "cmove must be of type LIR_Op4");
-+                  prev_cmove = (LIR_Op4*)prev_op;
-                   assert(prev_branch->cond() == prev_cmove->condition(), "should be the same");
-                 }
-                 if (prev_op->code() == lir_cmp) {
-diff --git a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
-index 4771a8b8652..6d377fa005d 100644
---- a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
-+++ b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved.
-+ * Copyright (c) 2018, 2022, Red Hat, Inc. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -31,7 +31,7 @@
- #include "utilities/defaultStream.hpp"
- 
- void ShenandoahArguments::initialize() {
--#if !(defined AARCH64 || defined AMD64 || defined IA32)
-+#if !(defined AARCH64 || defined AMD64 || defined IA32 || defined RISCV64)
-   vm_exit_during_initialization("Shenandoah GC is not supported on this platform.");
- #endif
- 
-diff --git a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp
-index 9f8ce742433..f36dd612eff 100644
---- a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp
-+++ b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -100,7 +100,7 @@ class LIR_OpZLoadBarrierTest : public LIR_Op {
- 
- public:
-   LIR_OpZLoadBarrierTest(LIR_Opr opr) :
--      LIR_Op(),
-+      LIR_Op(lir_zloadbarrier_test, LIR_OprFact::illegalOpr, NULL),
-       _opr(opr) {}
- 
-   virtual void visit(LIR_OpVisitState* state) {
-diff --git a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
-index e01a242a57e..ff16de0e778 100644
---- a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
-+++ b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
-@@ -102,7 +102,7 @@ inline T JfrBigEndian::read_unaligned(const address location) {
- inline bool JfrBigEndian::platform_supports_unaligned_reads(void) {
- #if defined(IA32) || defined(AMD64) || defined(PPC) || defined(S390)
-   return true;
--#elif defined(SPARC) || defined(ARM) || defined(AARCH64)
-+#elif defined(SPARC) || defined(ARM) || defined(AARCH64) || defined(RISCV)
-   return false;
- #else
-   #warning "Unconfigured platform"
-diff --git a/src/hotspot/share/opto/regmask.hpp b/src/hotspot/share/opto/regmask.hpp
-index c64d0879592..bc856d4b617 100644
---- a/src/hotspot/share/opto/regmask.hpp
-+++ b/src/hotspot/share/opto/regmask.hpp
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-diff --git a/src/hotspot/share/runtime/abstract_vm_version.cpp b/src/hotspot/share/runtime/abstract_vm_version.cpp
-index c46247f2bdb..b5e64b65ff7 100644
---- a/src/hotspot/share/runtime/abstract_vm_version.cpp
-+++ b/src/hotspot/share/runtime/abstract_vm_version.cpp
-@@ -196,7 +196,8 @@ const char* Abstract_VM_Version::jre_release_version() {
-                  IA32_ONLY("x86")                \
-                  IA64_ONLY("ia64")               \
-                  S390_ONLY("s390")               \
--                 SPARC_ONLY("sparc")
-+                 SPARC_ONLY("sparc")             \
-+                 RISCV64_ONLY("riscv64")
- #endif // !ZERO
- #endif // !CPU
- 
-diff --git a/src/hotspot/share/runtime/synchronizer.cpp b/src/hotspot/share/runtime/synchronizer.cpp
-index e7b32723e47..434826853ee 100644
---- a/src/hotspot/share/runtime/synchronizer.cpp
-+++ b/src/hotspot/share/runtime/synchronizer.cpp
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 1998, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 1998, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-diff --git a/src/hotspot/share/runtime/thread.hpp b/src/hotspot/share/runtime/thread.hpp
-index aa914eccafc..a2f98e6a251 100644
---- a/src/hotspot/share/runtime/thread.hpp
-+++ b/src/hotspot/share/runtime/thread.hpp
-@@ -1234,7 +1234,7 @@ class JavaThread: public Thread {
-   address last_Java_pc(void)                     { return _anchor.last_Java_pc(); }
- 
-   // Safepoint support
--#if !(defined(PPC64) || defined(AARCH64))
-+#if !(defined(PPC64) || defined(AARCH64) || defined(RISCV64))
-   JavaThreadState thread_state() const           { return _thread_state; }
-   void set_thread_state(JavaThreadState s)       {
-     assert(current_or_null() == NULL || current_or_null() == this,
-diff --git a/src/hotspot/share/runtime/thread.inline.hpp b/src/hotspot/share/runtime/thread.inline.hpp
-index dee8534f739..9af07aeb459 100644
---- a/src/hotspot/share/runtime/thread.inline.hpp
-+++ b/src/hotspot/share/runtime/thread.inline.hpp
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2012, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved.
-  * Copyright (c) 2021, Azul Systems, Inc. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-@@ -142,7 +142,7 @@ inline void JavaThread::set_pending_async_exception(oop e) {
-   set_has_async_exception();
- }
- 
--#if defined(PPC64) || defined (AARCH64)
-+#if defined(PPC64) || defined (AARCH64) || defined(RISCV64)
- inline JavaThreadState JavaThread::thread_state() const    {
-   return (JavaThreadState) OrderAccess::load_acquire((volatile jint*)&_thread_state);
- }
-diff --git a/src/hotspot/share/utilities/macros.hpp b/src/hotspot/share/utilities/macros.hpp
-index cf802538689..e8ab3097ac7 100644
---- a/src/hotspot/share/utilities/macros.hpp
-+++ b/src/hotspot/share/utilities/macros.hpp
-@@ -597,6 +597,32 @@
- 
- #define MACOS_AARCH64_ONLY(x) MACOS_ONLY(AARCH64_ONLY(x))
- 
-+#if defined(RISCV32) || defined(RISCV64)
-+#define RISCV
-+#define RISCV_ONLY(code) code
-+#define NOT_RISCV(code)
-+#else
-+#undef RISCV
-+#define RISCV_ONLY(code)
-+#define NOT_RISCV(code) code
-+#endif
-+
-+#ifdef RISCV32
-+#define RISCV32_ONLY(code) code
-+#define NOT_RISCV32(code)
-+#else
-+#define RISCV32_ONLY(code)
-+#define NOT_RISCV32(code) code
-+#endif
-+
-+#ifdef RISCV64
-+#define RISCV64_ONLY(code) code
-+#define NOT_RISCV64(code)
-+#else
-+#define RISCV64_ONLY(code)
-+#define NOT_RISCV64(code) code
-+#endif
-+
- #ifdef VM_LITTLE_ENDIAN
- #define LITTLE_ENDIAN_ONLY(code) code
- #define BIG_ENDIAN_ONLY(code)
-diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
-index 0d834302c57..45a927fb5ee 100644
---- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
-+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2002, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -58,6 +58,10 @@
- #include "sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext.h"
- #endif
- 
-+#ifdef riscv64
-+#include "sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext.h"
-+#endif
-+
- static jfieldID p_ps_prochandle_ID = 0;
- static jfieldID threadList_ID = 0;
- static jfieldID loadObjectList_ID = 0;
-@@ -397,7 +401,7 @@ JNIEXPORT jbyteArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
-   return (err == PS_OK)? array : 0;
- }
- 
--#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64)
-+#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64) || defined(riscv64)
- JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0
-   (JNIEnv *env, jobject this_obj, jint lwp_id) {
- 
-@@ -425,6 +429,9 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
- #if defined(sparc) || defined(sparcv9)
- #define NPRGREG sun_jvm_hotspot_debugger_sparc_SPARCThreadContext_NPRGREG
- #endif
-+#ifdef riscv64
-+#define NPRGREG sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_NPRGREG
-+#endif
- #if defined(ppc64) || defined(ppc64le)
- #define NPRGREG sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_NPRGREG
- #endif
-@@ -534,6 +541,44 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
-   }
- #endif /* aarch64 */
- 
-+#if defined(riscv64)
-+#define REG_INDEX(reg)  sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_##reg
-+
-+  regs[REG_INDEX(PC)]  = gregs.pc;
-+  regs[REG_INDEX(LR)]  = gregs.ra;
-+  regs[REG_INDEX(SP)]  = gregs.sp;
-+  regs[REG_INDEX(R3)]  = gregs.gp;
-+  regs[REG_INDEX(R4)]  = gregs.tp;
-+  regs[REG_INDEX(R5)]  = gregs.t0;
-+  regs[REG_INDEX(R6)]  = gregs.t1;
-+  regs[REG_INDEX(R7)]  = gregs.t2;
-+  regs[REG_INDEX(R8)]  = gregs.s0;
-+  regs[REG_INDEX(R9)]  = gregs.s1;
-+  regs[REG_INDEX(R10)]  = gregs.a0;
-+  regs[REG_INDEX(R11)]  = gregs.a1;
-+  regs[REG_INDEX(R12)]  = gregs.a2;
-+  regs[REG_INDEX(R13)]  = gregs.a3;
-+  regs[REG_INDEX(R14)]  = gregs.a4;
-+  regs[REG_INDEX(R15)]  = gregs.a5;
-+  regs[REG_INDEX(R16)]  = gregs.a6;
-+  regs[REG_INDEX(R17)]  = gregs.a7;
-+  regs[REG_INDEX(R18)]  = gregs.s2;
-+  regs[REG_INDEX(R19)]  = gregs.s3;
-+  regs[REG_INDEX(R20)]  = gregs.s4;
-+  regs[REG_INDEX(R21)]  = gregs.s5;
-+  regs[REG_INDEX(R22)]  = gregs.s6;
-+  regs[REG_INDEX(R23)]  = gregs.s7;
-+  regs[REG_INDEX(R24)]  = gregs.s8;
-+  regs[REG_INDEX(R25)]  = gregs.s9;
-+  regs[REG_INDEX(R26)]  = gregs.s10;
-+  regs[REG_INDEX(R27)]  = gregs.s11;
-+  regs[REG_INDEX(R28)]  = gregs.t3;
-+  regs[REG_INDEX(R29)]  = gregs.t4;
-+  regs[REG_INDEX(R30)]  = gregs.t5;
-+  regs[REG_INDEX(R31)]  = gregs.t6;
-+
-+#endif /* riscv64 */
-+
- #if defined(ppc64) || defined(ppc64le)
- #define REG_INDEX(reg) sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_##reg
- 
-diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
-index 8318e8e0213..ab092d4ee33 100644
---- a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
-+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -43,6 +43,8 @@
- #elif defined(arm)
- #include <asm/ptrace.h>
- #define user_regs_struct  pt_regs
-+#elif defined(riscv64)
-+#include <asm/ptrace.h>
- #endif
- 
- // This C bool type must be int for compatibility with Linux calls and
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
-index 0f5f0119c73..9bff9ee9b15 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
-@@ -36,6 +36,7 @@
- import sun.jvm.hotspot.debugger.MachineDescriptionAMD64;
- import sun.jvm.hotspot.debugger.MachineDescriptionPPC64;
- import sun.jvm.hotspot.debugger.MachineDescriptionAArch64;
-+import sun.jvm.hotspot.debugger.MachineDescriptionRISCV64;
- import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86;
- import sun.jvm.hotspot.debugger.MachineDescriptionSPARC32Bit;
- import sun.jvm.hotspot.debugger.MachineDescriptionSPARC64Bit;
-@@ -598,6 +599,8 @@ private void setupDebuggerLinux() {
-             } else {
-                     machDesc = new MachineDescriptionSPARC32Bit();
-             }
-+        } else if (cpu.equals("riscv64")) {
-+            machDesc = new MachineDescriptionRISCV64();
-         } else {
-           try {
-             machDesc = (MachineDescription)
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java
-new file mode 100644
-index 00000000000..a972516dee3
---- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java
-@@ -0,0 +1,40 @@
-+/*
-+ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+package sun.jvm.hotspot.debugger;
-+
-+public class MachineDescriptionRISCV64 extends MachineDescriptionTwosComplement implements MachineDescription {
-+  public long getAddressSize() {
-+    return 8;
-+  }
-+
-+  public boolean isLP64() {
-+    return true;
-+  }
-+
-+  public boolean isBigEndian() {
-+    return false;
-+  }
-+}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
-index 5e5a6bb7141..dc0bcb3da94 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
-  * Copyright (c) 2015, Red Hat Inc.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-@@ -34,12 +34,14 @@
- import sun.jvm.hotspot.debugger.amd64.*;
- import sun.jvm.hotspot.debugger.aarch64.*;
- import sun.jvm.hotspot.debugger.sparc.*;
-+import sun.jvm.hotspot.debugger.riscv64.*;
- import sun.jvm.hotspot.debugger.ppc64.*;
- import sun.jvm.hotspot.debugger.linux.x86.*;
- import sun.jvm.hotspot.debugger.linux.amd64.*;
- import sun.jvm.hotspot.debugger.linux.sparc.*;
- import sun.jvm.hotspot.debugger.linux.ppc64.*;
- import sun.jvm.hotspot.debugger.linux.aarch64.*;
-+import sun.jvm.hotspot.debugger.linux.riscv64.*;
- import sun.jvm.hotspot.utilities.*;
- 
- class LinuxCDebugger implements CDebugger {
-@@ -116,7 +118,14 @@ public CFrame topFrameForThread(ThreadProxy thread) throws DebuggerException {
-        Address pc  = context.getRegisterAsAddress(AARCH64ThreadContext.PC);
-        if (pc == null) return null;
-        return new LinuxAARCH64CFrame(dbg, fp, pc);
--     } else {
-+    } else if (cpu.equals("riscv64")) {
-+       RISCV64ThreadContext context = (RISCV64ThreadContext) thread.getContext();
-+       Address fp = context.getRegisterAsAddress(RISCV64ThreadContext.FP);
-+       if (fp == null) return null;
-+       Address pc  = context.getRegisterAsAddress(RISCV64ThreadContext.PC);
-+       if (pc == null) return null;
-+       return new LinuxRISCV64CFrame(dbg, fp, pc);
-+    } else {
-        // Runtime exception thrown by LinuxThreadContextFactory if unknown cpu
-        ThreadContext context = (ThreadContext) thread.getContext();
-        return context.getTopFrame(dbg);
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java
-new file mode 100644
-index 00000000000..f06da24bd0e
---- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java
-@@ -0,0 +1,90 @@
-+/*
-+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, Red Hat Inc.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+package sun.jvm.hotspot.debugger.linux.riscv64;
-+
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.debugger.riscv64.*;
-+import sun.jvm.hotspot.debugger.linux.*;
-+import sun.jvm.hotspot.debugger.cdbg.*;
-+import sun.jvm.hotspot.debugger.cdbg.basic.*;
-+
-+public final class LinuxRISCV64CFrame extends BasicCFrame {
-+   private static final int C_FRAME_LINK_OFFSET        = -2;
-+   private static final int C_FRAME_RETURN_ADDR_OFFSET = -1;
-+
-+   public LinuxRISCV64CFrame(LinuxDebugger dbg, Address fp, Address pc) {
-+      super(dbg.getCDebugger());
-+      this.fp = fp;
-+      this.pc = pc;
-+      this.dbg = dbg;
-+   }
-+
-+   // override base class impl to avoid ELF parsing
-+   public ClosestSymbol closestSymbolToPC() {
-+      // try native lookup in debugger.
-+      return dbg.lookup(dbg.getAddressValue(pc()));
-+   }
-+
-+   public Address pc() {
-+      return pc;
-+   }
-+
-+   public Address localVariableBase() {
-+      return fp;
-+   }
-+
-+   public CFrame sender(ThreadProxy thread) {
-+      RISCV64ThreadContext context = (RISCV64ThreadContext) thread.getContext();
-+      Address rsp = context.getRegisterAsAddress(RISCV64ThreadContext.SP);
-+
-+      if ((fp == null) || fp.lessThan(rsp)) {
-+        return null;
-+      }
-+
-+      // Check alignment of fp
-+      if (dbg.getAddressValue(fp) % (2 * ADDRESS_SIZE) != 0) {
-+        return null;
-+      }
-+
-+      Address nextFP = fp.getAddressAt(C_FRAME_LINK_OFFSET * ADDRESS_SIZE);
-+      if (nextFP == null || nextFP.lessThanOrEqual(fp)) {
-+        return null;
-+      }
-+      Address nextPC  = fp.getAddressAt(C_FRAME_RETURN_ADDR_OFFSET * ADDRESS_SIZE);
-+      if (nextPC == null) {
-+        return null;
-+      }
-+      return new LinuxRISCV64CFrame(dbg, nextFP, nextPC);
-+   }
-+
-+   // package/class internals only
-+   private static final int ADDRESS_SIZE = 8;
-+   private Address pc;
-+   private Address sp;
-+   private Address fp;
-+   private LinuxDebugger dbg;
-+}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java
-new file mode 100644
-index 00000000000..fdb841ccf3d
---- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java
-@@ -0,0 +1,48 @@
-+/*
-+ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, Red Hat Inc.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+package sun.jvm.hotspot.debugger.linux.riscv64;
-+
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.debugger.riscv64.*;
-+import sun.jvm.hotspot.debugger.linux.*;
-+
-+public class LinuxRISCV64ThreadContext extends RISCV64ThreadContext {
-+  private LinuxDebugger debugger;
-+
-+  public LinuxRISCV64ThreadContext(LinuxDebugger debugger) {
-+    super();
-+    this.debugger = debugger;
-+  }
-+
-+  public void setRegisterAsAddress(int index, Address value) {
-+    setRegister(index, debugger.getAddressValue(value));
-+  }
-+
-+  public Address getRegisterAsAddress(int index) {
-+    return debugger.newAddress(getRegister(index));
-+  }
-+}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java
-new file mode 100644
-index 00000000000..96d5dee47ce
---- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java
-@@ -0,0 +1,88 @@
-+/*
-+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, Red Hat Inc.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+package sun.jvm.hotspot.debugger.proc.riscv64;
-+
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.debugger.riscv64.*;
-+import sun.jvm.hotspot.debugger.proc.*;
-+import sun.jvm.hotspot.utilities.*;
-+
-+public class ProcRISCV64Thread implements ThreadProxy {
-+    private ProcDebugger debugger;
-+    private int         id;
-+
-+    public ProcRISCV64Thread(ProcDebugger debugger, Address addr) {
-+        this.debugger = debugger;
-+
-+        // FIXME: the size here should be configurable. However, making it
-+        // so would produce a dependency on the "types" package from the
-+        // debugger package, which is not desired.
-+        this.id       = (int) addr.getCIntegerAt(0, 4, true);
-+    }
-+
-+    public ProcRISCV64Thread(ProcDebugger debugger, long id) {
-+        this.debugger = debugger;
-+        this.id = (int) id;
-+    }
-+
-+    public ThreadContext getContext() throws IllegalThreadStateException {
-+        ProcRISCV64ThreadContext context = new ProcRISCV64ThreadContext(debugger);
-+        long[] regs = debugger.getThreadIntegerRegisterSet(id);
-+        if (Assert.ASSERTS_ENABLED) {
-+            Assert.that(regs.length == RISCV64ThreadContext.NPRGREG, "size mismatch");
-+        }
-+        for (int i = 0; i < regs.length; i++) {
-+            context.setRegister(i, regs[i]);
-+        }
-+        return context;
-+    }
-+
-+    public boolean canSetContext() throws DebuggerException {
-+        return false;
-+    }
-+
-+    public void setContext(ThreadContext context)
-+    throws IllegalThreadStateException, DebuggerException {
-+        throw new DebuggerException("Unimplemented");
-+    }
-+
-+    public String toString() {
-+        return "t@" + id;
-+    }
-+
-+    public boolean equals(Object obj) {
-+        if ((obj == null) || !(obj instanceof ProcRISCV64Thread)) {
-+            return false;
-+        }
-+
-+        return (((ProcRISCV64Thread) obj).id == id);
-+    }
-+
-+    public int hashCode() {
-+        return id;
-+    }
-+}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java
-new file mode 100644
-index 00000000000..f2aa845e665
---- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java
-@@ -0,0 +1,48 @@
-+/*
-+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, Red Hat Inc.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+package sun.jvm.hotspot.debugger.proc.riscv64;
-+
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.debugger.riscv64.*;
-+import sun.jvm.hotspot.debugger.proc.*;
-+
-+public class ProcRISCV64ThreadContext extends RISCV64ThreadContext {
-+    private ProcDebugger debugger;
-+
-+    public ProcRISCV64ThreadContext(ProcDebugger debugger) {
-+        super();
-+        this.debugger = debugger;
-+    }
-+
-+    public void setRegisterAsAddress(int index, Address value) {
-+        setRegister(index, debugger.getAddressValue(value));
-+    }
-+
-+    public Address getRegisterAsAddress(int index) {
-+        return debugger.newAddress(getRegister(index));
-+    }
-+}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java
-new file mode 100644
-index 00000000000..19f64b8ce2d
---- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java
-@@ -0,0 +1,46 @@
-+/*
-+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, Red Hat Inc.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+package sun.jvm.hotspot.debugger.proc.riscv64;
-+
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.debugger.proc.*;
-+
-+public class ProcRISCV64ThreadFactory implements ProcThreadFactory {
-+    private ProcDebugger debugger;
-+
-+    public ProcRISCV64ThreadFactory(ProcDebugger debugger) {
-+        this.debugger = debugger;
-+    }
-+
-+    public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
-+        return new ProcRISCV64Thread(debugger, threadIdentifierAddr);
-+    }
-+
-+    public ThreadProxy createThreadWrapper(long id) {
-+        return new ProcRISCV64Thread(debugger, id);
-+    }
-+}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java
-new file mode 100644
-index 00000000000..aecbda59023
---- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java
-@@ -0,0 +1,55 @@
-+/*
-+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, Red Hat Inc.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+package sun.jvm.hotspot.debugger.remote.riscv64;
-+
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.debugger.riscv64.*;
-+import sun.jvm.hotspot.debugger.remote.*;
-+import sun.jvm.hotspot.utilities.*;
-+
-+public class RemoteRISCV64Thread extends RemoteThread  {
-+  public RemoteRISCV64Thread(RemoteDebuggerClient debugger, Address addr) {
-+     super(debugger, addr);
-+  }
-+
-+  public RemoteRISCV64Thread(RemoteDebuggerClient debugger, long id) {
-+     super(debugger, id);
-+  }
-+
-+  public ThreadContext getContext() throws IllegalThreadStateException {
-+    RemoteRISCV64ThreadContext context = new RemoteRISCV64ThreadContext(debugger);
-+    long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) :
-+                                  debugger.getThreadIntegerRegisterSet(id);
-+    if (Assert.ASSERTS_ENABLED) {
-+      Assert.that(regs.length == RISCV64ThreadContext.NPRGREG, "size of register set must match");
-+    }
-+    for (int i = 0; i < regs.length; i++) {
-+      context.setRegister(i, regs[i]);
-+    }
-+    return context;
-+  }
-+}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java
-new file mode 100644
-index 00000000000..1d3da6be5af
---- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java
-@@ -0,0 +1,48 @@
-+/*
-+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, Red Hat Inc.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+package sun.jvm.hotspot.debugger.remote.riscv64;
-+
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.debugger.riscv64.*;
-+import sun.jvm.hotspot.debugger.remote.*;
-+
-+public class RemoteRISCV64ThreadContext extends RISCV64ThreadContext {
-+  private RemoteDebuggerClient debugger;
-+
-+  public RemoteRISCV64ThreadContext(RemoteDebuggerClient debugger) {
-+    super();
-+    this.debugger = debugger;
-+  }
-+
-+  public void setRegisterAsAddress(int index, Address value) {
-+    setRegister(index, debugger.getAddressValue(value));
-+  }
-+
-+  public Address getRegisterAsAddress(int index) {
-+    return debugger.newAddress(getRegister(index));
-+  }
-+}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java
-new file mode 100644
-index 00000000000..725b94e25a3
---- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java
-@@ -0,0 +1,46 @@
-+/*
-+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, Red Hat Inc.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+package sun.jvm.hotspot.debugger.remote.riscv64;
-+
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.debugger.remote.*;
-+
-+public class RemoteRISCV64ThreadFactory implements RemoteThreadFactory {
-+  private RemoteDebuggerClient debugger;
-+
-+  public RemoteRISCV64ThreadFactory(RemoteDebuggerClient debugger) {
-+    this.debugger = debugger;
-+  }
-+
-+  public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
-+    return new RemoteRISCV64Thread(debugger, threadIdentifierAddr);
-+  }
-+
-+  public ThreadProxy createThreadWrapper(long id) {
-+    return new RemoteRISCV64Thread(debugger, id);
-+  }
-+}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java
-new file mode 100644
-index 00000000000..fb60a70427a
---- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java
-@@ -0,0 +1,172 @@
-+/*
-+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, Red Hat Inc.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+package sun.jvm.hotspot.debugger.riscv64;
-+
-+import java.lang.annotation.Native;
-+
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.debugger.cdbg.*;
-+
-+/** Specifies the thread context on riscv64 platforms; only a sub-portion
-+ * of the context is guaranteed to be present on all operating
-+ * systems. */
-+
-+public abstract class RISCV64ThreadContext implements ThreadContext {
-+    // Taken from /usr/include/asm/sigcontext.h on Linux/RISCV64.
-+
-+    //  /*
-+    //   * Signal context structure - contains all info to do with the state
-+    //   * before the signal handler was invoked.
-+    //   */
-+    // struct sigcontext {
-+    //   struct user_regs_struct sc_regs;
-+    //   union __riscv_fp_state sc_fpregs;
-+    // };
-+    //
-+    // struct user_regs_struct {
-+    //    unsigned long pc;
-+    //    unsigned long ra;
-+    //    unsigned long sp;
-+    //    unsigned long gp;
-+    //    unsigned long tp;
-+    //    unsigned long t0;
-+    //    unsigned long t1;
-+    //    unsigned long t2;
-+    //    unsigned long s0;
-+    //    unsigned long s1;
-+    //    unsigned long a0;
-+    //    unsigned long a1;
-+    //    unsigned long a2;
-+    //    unsigned long a3;
-+    //    unsigned long a4;
-+    //    unsigned long a5;
-+    //    unsigned long a6;
-+    //    unsigned long a7;
-+    //    unsigned long s2;
-+    //    unsigned long s3;
-+    //    unsigned long s4;
-+    //    unsigned long s5;
-+    //    unsigned long s6;
-+    //    unsigned long s7;
-+    //    unsigned long s8;
-+    //    unsigned long s9;
-+    //    unsigned long s10;
-+    //    unsigned long s11;
-+    //    unsigned long t3;
-+    //    unsigned long t4;
-+    //    unsigned long t5;
-+    //    unsigned long t6;
-+    // };
-+
-+    // NOTE: the indices for the various registers must be maintained as
-+    // listed across various operating systems. However, only a small
-+    // subset of the registers' values are guaranteed to be present (and
-+    // must be present for the SA's stack walking to work)
-+
-+    // One instance of the Native annotation is enough to trigger header generation
-+    // for this file.
-+    @Native
-+    public static final int R0 = 0;
-+    public static final int R1 = 1;
-+    public static final int R2 = 2;
-+    public static final int R3 = 3;
-+    public static final int R4 = 4;
-+    public static final int R5 = 5;
-+    public static final int R6 = 6;
-+    public static final int R7 = 7;
-+    public static final int R8 = 8;
-+    public static final int R9 = 9;
-+    public static final int R10 = 10;
-+    public static final int R11 = 11;
-+    public static final int R12 = 12;
-+    public static final int R13 = 13;
-+    public static final int R14 = 14;
-+    public static final int R15 = 15;
-+    public static final int R16 = 16;
-+    public static final int R17 = 17;
-+    public static final int R18 = 18;
-+    public static final int R19 = 19;
-+    public static final int R20 = 20;
-+    public static final int R21 = 21;
-+    public static final int R22 = 22;
-+    public static final int R23 = 23;
-+    public static final int R24 = 24;
-+    public static final int R25 = 25;
-+    public static final int R26 = 26;
-+    public static final int R27 = 27;
-+    public static final int R28 = 28;
-+    public static final int R29 = 29;
-+    public static final int R30 = 30;
-+    public static final int R31 = 31;
-+
-+    public static final int NPRGREG = 32;
-+
-+    public static final int PC = R0;
-+    public static final int LR = R1;
-+    public static final int SP = R2;
-+    public static final int FP = R8;
-+
-+    private long[] data;
-+
-+    public RISCV64ThreadContext() {
-+        data = new long[NPRGREG];
-+    }
-+
-+    public int getNumRegisters() {
-+        return NPRGREG;
-+    }
-+
-+    public String getRegisterName(int index) {
-+        switch (index) {
-+        case LR: return "lr";
-+        case SP: return "sp";
-+        case PC: return "pc";
-+        default:
-+            return "r" + index;
-+        }
-+    }
-+
-+    public void setRegister(int index, long value) {
-+        data[index] = value;
-+    }
-+
-+    public long getRegister(int index) {
-+        return data[index];
-+    }
-+
-+    public CFrame getTopFrame(Debugger dbg) {
-+        return null;
-+    }
-+
-+    /** This can't be implemented in this class since we would have to
-+     * tie the implementation to, for example, the debugging system */
-+    public abstract void setRegisterAsAddress(int index, Address value);
-+
-+    /** This can't be implemented in this class since we would have to
-+     * tie the implementation to, for example, the debugging system */
-+    public abstract Address getRegisterAsAddress(int index);
-+}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
-index 190062785a7..89d676fe3b9 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -38,6 +38,7 @@
- import sun.jvm.hotspot.runtime.linux_x86.LinuxX86JavaThreadPDAccess;
- import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess;
- import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess;
-+import sun.jvm.hotspot.runtime.linux_riscv64.LinuxRISCV64JavaThreadPDAccess;
- import sun.jvm.hotspot.runtime.linux_ppc64.LinuxPPC64JavaThreadPDAccess;
- import sun.jvm.hotspot.runtime.linux_sparc.LinuxSPARCJavaThreadPDAccess;
- import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess;
-@@ -99,6 +100,8 @@ private static synchronized void initialize(TypeDataBase db) {
-                 access = new LinuxPPC64JavaThreadPDAccess();
-             } else if (cpu.equals("aarch64")) {
-                 access = new LinuxAARCH64JavaThreadPDAccess();
-+            } else if (cpu.equals("riscv64")) {
-+                access = new LinuxRISCV64JavaThreadPDAccess();
-             } else {
-               try {
-                 access = (JavaThreadPDAccess)
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
-new file mode 100644
-index 00000000000..f2e224f28ee
---- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
-@@ -0,0 +1,134 @@
-+/*
-+ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, Red Hat Inc.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+package sun.jvm.hotspot.runtime.linux_riscv64;
-+
-+import java.io.*;
-+import java.util.*;
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.debugger.riscv64.*;
-+import sun.jvm.hotspot.runtime.*;
-+import sun.jvm.hotspot.runtime.riscv64.*;
-+import sun.jvm.hotspot.types.*;
-+import sun.jvm.hotspot.utilities.*;
-+import sun.jvm.hotspot.utilities.Observable;
-+import sun.jvm.hotspot.utilities.Observer;
-+
-+public class LinuxRISCV64JavaThreadPDAccess implements JavaThreadPDAccess {
-+  private static AddressField  lastJavaFPField;
-+  private static AddressField  osThreadField;
-+
-+  // Field from OSThread
-+  private static CIntegerField osThreadThreadIDField;
-+
-+  // This is currently unneeded but is being kept in case we change
-+  // the currentFrameGuess algorithm
-+  private static final long GUESS_SCAN_RANGE = 128 * 1024;
-+
-+  static {
-+    VM.registerVMInitializedObserver(new Observer() {
-+        public void update(Observable o, Object data) {
-+          initialize(VM.getVM().getTypeDataBase());
-+        }
-+      });
-+  }
-+
-+  private static synchronized void initialize(TypeDataBase db) {
-+    Type type = db.lookupType("JavaThread");
-+    osThreadField           = type.getAddressField("_osthread");
-+
-+    Type anchorType = db.lookupType("JavaFrameAnchor");
-+    lastJavaFPField         = anchorType.getAddressField("_last_Java_fp");
-+
-+    Type osThreadType = db.lookupType("OSThread");
-+    osThreadThreadIDField   = osThreadType.getCIntegerField("_thread_id");
-+  }
-+
-+  public Address getLastJavaFP(Address addr) {
-+    return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset()));
-+  }
-+
-+  public Address getLastJavaPC(Address addr) {
-+    return null;
-+  }
-+
-+  public Address getBaseOfStackPointer(Address addr) {
-+    return null;
-+  }
-+
-+  public Frame getLastFramePD(JavaThread thread, Address addr) {
-+    Address fp = thread.getLastJavaFP();
-+    if (fp == null) {
-+      return null; // no information
-+    }
-+    return new RISCV64Frame(thread.getLastJavaSP(), fp);
-+  }
-+
-+  public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) {
-+    return new RISCV64RegisterMap(thread, updateMap);
-+  }
-+
-+  public Frame getCurrentFrameGuess(JavaThread thread, Address addr) {
-+    ThreadProxy t = getThreadProxy(addr);
-+    RISCV64ThreadContext context = (RISCV64ThreadContext) t.getContext();
-+    RISCV64CurrentFrameGuess guesser = new RISCV64CurrentFrameGuess(context, thread);
-+    if (!guesser.run(GUESS_SCAN_RANGE)) {
-+      return null;
-+    }
-+    if (guesser.getPC() == null) {
-+      return new RISCV64Frame(guesser.getSP(), guesser.getFP());
-+    } else {
-+      return new RISCV64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC());
-+    }
-+  }
-+
-+  public void printThreadIDOn(Address addr, PrintStream tty) {
-+    tty.print(getThreadProxy(addr));
-+  }
-+
-+  public void printInfoOn(Address threadAddr, PrintStream tty) {
-+    tty.print("Thread id: ");
-+    printThreadIDOn(threadAddr, tty);
-+  }
-+
-+  public Address getLastSP(Address addr) {
-+    ThreadProxy t = getThreadProxy(addr);
-+    RISCV64ThreadContext context = (RISCV64ThreadContext) t.getContext();
-+    return context.getRegisterAsAddress(RISCV64ThreadContext.SP);
-+  }
-+
-+  public ThreadProxy getThreadProxy(Address addr) {
-+    // Addr is the address of the JavaThread.
-+    // Fetch the OSThread (for now and for simplicity, not making a
-+    // separate "OSThread" class in this package)
-+    Address osThreadAddr = osThreadField.getValue(addr);
-+    // Get the address of the _thread_id from the OSThread
-+    Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset());
-+
-+    JVMDebugger debugger = VM.getVM().getDebugger();
-+    return debugger.getThreadForIdentifierAddress(threadIdAddr);
-+  }
-+}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java
-new file mode 100644
-index 00000000000..34701c6922f
---- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java
-@@ -0,0 +1,223 @@
-+/*
-+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2019, Red Hat Inc.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+package sun.jvm.hotspot.runtime.riscv64;
-+
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.debugger.riscv64.*;
-+import sun.jvm.hotspot.code.*;
-+import sun.jvm.hotspot.interpreter.*;
-+import sun.jvm.hotspot.runtime.*;
-+import sun.jvm.hotspot.runtime.riscv64.*;
-+
-+/** <P> Should be able to be used on all riscv64 platforms we support
-+    (Linux/riscv64) to implement JavaThread's "currentFrameGuess()"
-+    functionality. Input is an RISCV64ThreadContext; output is SP, FP,
-+    and PC for an RISCV64Frame. Instantiation of the RISCV64Frame is
-+    left to the caller, since we may need to subclass RISCV64Frame to
-+    support signal handler frames on Unix platforms. </P>
-+
-+    <P> Algorithm is to walk up the stack within a given range (say,
-+    512K at most) looking for a plausible PC and SP for a Java frame,
-+    also considering those coming in from the context. If we find a PC
-+    that belongs to the VM (i.e., in generated code like the
-+    interpreter or CodeCache) then we try to find an associated FP.
-+    We repeat this until we either find a complete frame or run out of
-+    stack to look at. </P> */
-+
-+public class RISCV64CurrentFrameGuess {
-+  private RISCV64ThreadContext context;
-+  private JavaThread       thread;
-+  private Address          spFound;
-+  private Address          fpFound;
-+  private Address          pcFound;
-+
-+  private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.riscv64.RISCV64Frame.DEBUG")
-+                                       != null;
-+
-+  public RISCV64CurrentFrameGuess(RISCV64ThreadContext context,
-+                              JavaThread thread) {
-+    this.context = context;
-+    this.thread  = thread;
-+  }
-+
-+  /** Returns false if not able to find a frame within a reasonable range. */
-+  public boolean run(long regionInBytesToSearch) {
-+    Address sp  = context.getRegisterAsAddress(RISCV64ThreadContext.SP);
-+    Address pc  = context.getRegisterAsAddress(RISCV64ThreadContext.PC);
-+    Address fp  = context.getRegisterAsAddress(RISCV64ThreadContext.FP);
-+    if (sp == null) {
-+      // Bail out if no last java frame either
-+      if (thread.getLastJavaSP() != null) {
-+        setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null);
-+        return true;
-+      }
-+      return false;
-+    }
-+    Address end = sp.addOffsetTo(regionInBytesToSearch);
-+    VM vm       = VM.getVM();
-+
-+    setValues(null, null, null); // Assume we're not going to find anything
-+
-+    if (vm.isJavaPCDbg(pc)) {
-+      if (vm.isClientCompiler()) {
-+        // If the topmost frame is a Java frame, we are (pretty much)
-+        // guaranteed to have a viable FP. We should be more robust
-+        // than this (we have the potential for losing entire threads'
-+        // stack traces) but need to see how much work we really have
-+        // to do here. Searching the stack for an (SP, FP) pair is
-+        // hard since it's easy to misinterpret inter-frame stack
-+        // pointers as base-of-frame pointers; we also don't know the
-+        // sizes of C1 frames (not registered in the nmethod) so can't
-+        // derive them from SP.
-+
-+        setValues(sp, fp, pc);
-+        return true;
-+      } else {
-+        if (vm.getInterpreter().contains(pc)) {
-+          if (DEBUG) {
-+            System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " +
-+                               sp + ", fp = " + fp + ", pc = " + pc);
-+          }
-+          setValues(sp, fp, pc);
-+          return true;
-+        }
-+
-+        // For the server compiler, FP is not guaranteed to be valid
-+        // for compiled code. In addition, an earlier attempt at a
-+        // non-searching algorithm (see below) failed because the
-+        // stack pointer from the thread context was pointing
-+        // (considerably) beyond the ostensible end of the stack, into
-+        // garbage; walking from the topmost frame back caused a crash.
-+        //
-+        // This algorithm takes the current PC as a given and tries to
-+        // find the correct corresponding SP by walking up the stack
-+        // and repeatedly performing stackwalks (very inefficient).
-+        //
-+        // FIXME: there is something wrong with stackwalking across
-+        // adapter frames...this is likely to be the root cause of the
-+        // failure with the simpler algorithm below.
-+
-+        for (long offset = 0;
-+             offset < regionInBytesToSearch;
-+             offset += vm.getAddressSize()) {
-+          try {
-+            Address curSP = sp.addOffsetTo(offset);
-+            Frame frame = new RISCV64Frame(curSP, null, pc);
-+            RegisterMap map = thread.newRegisterMap(false);
-+            while (frame != null) {
-+              if (frame.isEntryFrame() && frame.entryFrameIsFirst()) {
-+                // We were able to traverse all the way to the
-+                // bottommost Java frame.
-+                // This sp looks good. Keep it.
-+                if (DEBUG) {
-+                  System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc);
-+                }
-+                setValues(curSP, null, pc);
-+                return true;
-+              }
-+              frame = frame.sender(map);
-+            }
-+          } catch (Exception e) {
-+            if (DEBUG) {
-+              System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset);
-+            }
-+            // Bad SP. Try another.
-+          }
-+        }
-+
-+        // Were not able to find a plausible SP to go with this PC.
-+        // Bail out.
-+        return false;
-+      }
-+    } else {
-+      // If the current program counter was not known to us as a Java
-+      // PC, we currently assume that we are in the run-time system
-+      // and attempt to look to thread-local storage for saved SP and
-+      // FP. Note that if these are null (because we were, in fact,
-+      // in Java code, i.e., vtable stubs or similar, and the SA
-+      // didn't have enough insight into the target VM to understand
-+      // that) then we are going to lose the entire stack trace for
-+      // the thread, which is sub-optimal. FIXME.
-+
-+      if (DEBUG) {
-+        System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " +
-+                           thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP());
-+      }
-+      if (thread.getLastJavaSP() == null) {
-+        return false; // No known Java frames on stack
-+      }
-+
-+      // The runtime has a nasty habit of not saving fp in the frame
-+      // anchor, leaving us to grovel about in the stack to find a
-+      // plausible address.  Fortunately, this only happens in
-+      // compiled code; there we always have a valid PC, and we always
-+      // push LR and FP onto the stack as a pair, with FP at the lower
-+      // address.
-+      pc = thread.getLastJavaPC();
-+      fp = thread.getLastJavaFP();
-+      sp = thread.getLastJavaSP();
-+
-+      if (fp == null) {
-+        CodeCache cc = vm.getCodeCache();
-+        if (cc.contains(pc)) {
-+          CodeBlob cb = cc.findBlob(pc);
-+          if (DEBUG) {
-+            System.out.println("FP is null.  Found blob frame size " + cb.getFrameSize());
-+          }
-+          // See if we can derive a frame pointer from SP and PC
-+          long link_offset = cb.getFrameSize() - 2 * VM.getVM().getAddressSize();
-+          if (link_offset >= 0) {
-+            fp = sp.addOffsetTo(link_offset);
-+          }
-+        }
-+      }
-+
-+      // We found a PC in the frame anchor. Check that it's plausible, and
-+      // if it is, use it.
-+      if (vm.isJavaPCDbg(pc)) {
-+        setValues(sp, fp, pc);
-+      } else {
-+        setValues(sp, fp, null);
-+      }
-+
-+      return true;
-+    }
-+  }
-+
-+  public Address getSP() { return spFound; }
-+  public Address getFP() { return fpFound; }
-+  /** May be null if getting values from thread-local storage; take
-+      care to call the correct RISCV64Frame constructor to recover this if
-+      necessary */
-+  public Address getPC() { return pcFound; }
-+
-+  private void setValues(Address sp, Address fp, Address pc) {
-+    spFound = sp;
-+    fpFound = fp;
-+    pcFound = pc;
-+  }
-+}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
-new file mode 100644
-index 00000000000..df280005d72
---- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
-@@ -0,0 +1,556 @@
-+/*
-+ * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2019, Red Hat Inc.
-+ * Copyright (c) 2021, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+package sun.jvm.hotspot.runtime.riscv64;
-+
-+import java.util.*;
-+import sun.jvm.hotspot.code.*;
-+import sun.jvm.hotspot.compiler.*;
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.oops.*;
-+import sun.jvm.hotspot.runtime.*;
-+import sun.jvm.hotspot.types.*;
-+import sun.jvm.hotspot.utilities.*;
-+import sun.jvm.hotspot.utilities.Observable;
-+import sun.jvm.hotspot.utilities.Observer;
-+
-+/** Specialization of and implementation of abstract methods of the
-+    Frame class for the riscv64 family of CPUs. */
-+
-+public class RISCV64Frame extends Frame {
-+  private static final boolean DEBUG;
-+  static {
-+    DEBUG = System.getProperty("sun.jvm.hotspot.runtime.RISCV64.RISCV64Frame.DEBUG") != null;
-+  }
-+
-+  // Java frames
-+  private static final int LINK_OFFSET                =  -2;
-+  private static final int RETURN_ADDR_OFFSET         =  -1;
-+  private static final int SENDER_SP_OFFSET           =   0;
-+
-+  // Interpreter frames
-+  private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -3;
-+  private static final int INTERPRETER_FRAME_LAST_SP_OFFSET   = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1;
-+  private static final int INTERPRETER_FRAME_METHOD_OFFSET    = INTERPRETER_FRAME_LAST_SP_OFFSET - 1;
-+  private static       int INTERPRETER_FRAME_MDX_OFFSET;         // Non-core builds only
-+  private static       int INTERPRETER_FRAME_PADDING_OFFSET;
-+  private static       int INTERPRETER_FRAME_MIRROR_OFFSET;
-+  private static       int INTERPRETER_FRAME_CACHE_OFFSET;
-+  private static       int INTERPRETER_FRAME_LOCALS_OFFSET;
-+  private static       int INTERPRETER_FRAME_BCX_OFFSET;
-+  private static       int INTERPRETER_FRAME_INITIAL_SP_OFFSET;
-+  private static       int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET;
-+  private static       int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET;
-+
-+  // Entry frames
-+  private static       int ENTRY_FRAME_CALL_WRAPPER_OFFSET = -10;
-+
-+  // Native frames
-+  private static final int NATIVE_FRAME_INITIAL_PARAM_OFFSET =  2;
-+
-+  private static VMReg fp = new VMReg(8);
-+
-+  static {
-+    VM.registerVMInitializedObserver(new Observer() {
-+        public void update(Observable o, Object data) {
-+          initialize(VM.getVM().getTypeDataBase());
-+        }
-+      });
-+  }
-+
-+  private static synchronized void initialize(TypeDataBase db) {
-+    INTERPRETER_FRAME_MDX_OFFSET                  = INTERPRETER_FRAME_METHOD_OFFSET - 1;
-+    INTERPRETER_FRAME_PADDING_OFFSET              = INTERPRETER_FRAME_MDX_OFFSET - 1;
-+    INTERPRETER_FRAME_MIRROR_OFFSET               = INTERPRETER_FRAME_PADDING_OFFSET - 1;
-+    INTERPRETER_FRAME_CACHE_OFFSET                = INTERPRETER_FRAME_MIRROR_OFFSET - 1;
-+    INTERPRETER_FRAME_LOCALS_OFFSET               = INTERPRETER_FRAME_CACHE_OFFSET - 1;
-+    INTERPRETER_FRAME_BCX_OFFSET                  = INTERPRETER_FRAME_LOCALS_OFFSET - 1;
-+    INTERPRETER_FRAME_INITIAL_SP_OFFSET           = INTERPRETER_FRAME_BCX_OFFSET - 1;
-+    INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET    = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
-+    INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
-+  }
-+
-+
-+  // an additional field beyond sp and pc:
-+  Address raw_fp; // frame pointer
-+  private Address raw_unextendedSP;
-+
-+  private RISCV64Frame() {
-+  }
-+
-+  private void adjustForDeopt() {
-+    if ( pc != null) {
-+      // Look for a deopt pc and if it is deopted convert to original pc
-+      CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc);
-+      if (cb != null && cb.isJavaMethod()) {
-+        NMethod nm = (NMethod) cb;
-+        if (pc.equals(nm.deoptHandlerBegin())) {
-+          if (Assert.ASSERTS_ENABLED) {
-+            Assert.that(this.getUnextendedSP() != null, "null SP in Java frame");
-+          }
-+          // adjust pc if frame is deoptimized.
-+          pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset());
-+          deoptimized = true;
-+        }
-+      }
-+    }
-+  }
-+
-+  public RISCV64Frame(Address raw_sp, Address raw_fp, Address pc) {
-+    this.raw_sp = raw_sp;
-+    this.raw_unextendedSP = raw_sp;
-+    this.raw_fp = raw_fp;
-+    this.pc = pc;
-+    adjustUnextendedSP();
-+
-+    // Frame must be fully constructed before this call
-+    adjustForDeopt();
-+
-+    if (DEBUG) {
-+      System.out.println("RISCV64Frame(sp, fp, pc): " + this);
-+      dumpStack();
-+    }
-+  }
-+
-+  public RISCV64Frame(Address raw_sp, Address raw_fp) {
-+    this.raw_sp = raw_sp;
-+    this.raw_unextendedSP = raw_sp;
-+    this.raw_fp = raw_fp;
-+
-+    // We cannot assume SP[-1] always contains a valid return PC (e.g. if
-+    // the callee is a C/C++ compiled frame). If the PC is not known to
-+    // Java then this.pc is null.
-+    Address savedPC = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize());
-+    if (VM.getVM().isJavaPCDbg(savedPC)) {
-+      this.pc = savedPC;
-+    }
-+
-+    adjustUnextendedSP();
-+
-+    // Frame must be fully constructed before this call
-+    adjustForDeopt();
-+
-+    if (DEBUG) {
-+      System.out.println("RISCV64Frame(sp, fp): " + this);
-+      dumpStack();
-+    }
-+  }
-+
-+  public RISCV64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) {
-+    this.raw_sp = raw_sp;
-+    this.raw_unextendedSP = raw_unextendedSp;
-+    this.raw_fp = raw_fp;
-+    this.pc = pc;
-+    adjustUnextendedSP();
-+
-+    // Frame must be fully constructed before this call
-+    adjustForDeopt();
-+
-+    if (DEBUG) {
-+      System.out.println("RISCV64Frame(sp, unextendedSP, fp, pc): " + this);
-+      dumpStack();
-+    }
-+
-+  }
-+
-+  public Object clone() {
-+    RISCV64Frame frame = new RISCV64Frame();
-+    frame.raw_sp = raw_sp;
-+    frame.raw_unextendedSP = raw_unextendedSP;
-+    frame.raw_fp = raw_fp;
-+    frame.pc = pc;
-+    frame.deoptimized = deoptimized;
-+    return frame;
-+  }
-+
-+  public boolean equals(Object arg) {
-+    if (arg == null) {
-+      return false;
-+    }
-+
-+    if (!(arg instanceof RISCV64Frame)) {
-+      return false;
-+    }
-+
-+    RISCV64Frame other = (RISCV64Frame) arg;
-+
-+    return (AddressOps.equal(getSP(), other.getSP()) &&
-+            AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) &&
-+            AddressOps.equal(getFP(), other.getFP()) &&
-+            AddressOps.equal(getPC(), other.getPC()));
-+  }
-+
-+  public int hashCode() {
-+    if (raw_sp == null) {
-+      return 0;
-+    }
-+
-+    return raw_sp.hashCode();
-+  }
-+
-+  public String toString() {
-+    return "sp: " + (getSP() == null? "null" : getSP().toString()) +
-+         ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) +
-+         ", fp: " + (getFP() == null? "null" : getFP().toString()) +
-+         ", pc: " + (pc == null? "null" : pc.toString());
-+  }
-+
-+  // accessors for the instance variables
-+  public Address getFP() { return raw_fp; }
-+  public Address getSP() { return raw_sp; }
-+  public Address getID() { return raw_sp; }
-+
-+  // FIXME: not implemented yet
-+  public boolean isSignalHandlerFrameDbg() { return false; }
-+  public int     getSignalNumberDbg()      { return 0;     }
-+  public String  getSignalNameDbg()        { return null;  }
-+
-+  public boolean isInterpretedFrameValid() {
-+    if (Assert.ASSERTS_ENABLED) {
-+      Assert.that(isInterpretedFrame(), "Not an interpreted frame");
-+    }
-+
-+    // These are reasonable sanity checks
-+    if (getFP() == null || getFP().andWithMask(0x3) != null) {
-+      return false;
-+    }
-+
-+    if (getSP() == null || getSP().andWithMask(0x3) != null) {
-+      return false;
-+    }
-+
-+    if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) {
-+      return false;
-+    }
-+
-+    // These are hacks to keep us out of trouble.
-+    // The problem with these is that they mask other problems
-+    if (getFP().lessThanOrEqual(getSP())) {
-+      // this attempts to deal with unsigned comparison above
-+      return false;
-+    }
-+
-+    if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) {
-+      // stack frames shouldn't be large.
-+      return false;
-+    }
-+
-+    return true;
-+  }
-+
-+  public Frame sender(RegisterMap regMap, CodeBlob cb) {
-+    RISCV64RegisterMap map = (RISCV64RegisterMap) regMap;
-+
-+    if (Assert.ASSERTS_ENABLED) {
-+      Assert.that(map != null, "map must be set");
-+    }
-+
-+    // Default is we done have to follow them. The sender_for_xxx will
-+    // update it accordingly
-+    map.setIncludeArgumentOops(false);
-+
-+    if (isEntryFrame())       return senderForEntryFrame(map);
-+    if (isInterpretedFrame()) return senderForInterpreterFrame(map);
-+
-+    if(cb == null) {
-+      cb = VM.getVM().getCodeCache().findBlob(getPC());
-+    } else {
-+      if (Assert.ASSERTS_ENABLED) {
-+        Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same");
-+      }
-+    }
-+
-+    if (cb != null) {
-+      return senderForCompiledFrame(map, cb);
-+    }
-+
-+    // Must be native-compiled frame, i.e. the marshaling code for native
-+    // methods that exists in the core system.
-+    return new RISCV64Frame(getSenderSP(), getLink(), getSenderPC());
-+  }
-+
-+  private Frame senderForEntryFrame(RISCV64RegisterMap map) {
-+    if (DEBUG) {
-+      System.out.println("senderForEntryFrame");
-+    }
-+    if (Assert.ASSERTS_ENABLED) {
-+      Assert.that(map != null, "map must be set");
-+    }
-+    // Java frame called from C; skip all C frames and return top C
-+    // frame of that chunk as the sender
-+    RISCV64JavaCallWrapper jcw = (RISCV64JavaCallWrapper) getEntryFrameCallWrapper();
-+    if (Assert.ASSERTS_ENABLED) {
-+      Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero");
-+      Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack");
-+    }
-+    RISCV64Frame fr;
-+    if (jcw.getLastJavaPC() != null) {
-+      fr = new RISCV64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC());
-+    } else {
-+      fr = new RISCV64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP());
-+    }
-+    map.clear();
-+    if (Assert.ASSERTS_ENABLED) {
-+      Assert.that(map.getIncludeArgumentOops(), "should be set by clear");
-+    }
-+    return fr;
-+  }
-+
-+  //------------------------------------------------------------------------------
-+  // frame::adjust_unextended_sp
-+  private void adjustUnextendedSP() {
-+    // If we are returning to a compiled MethodHandle call site, the
-+    // saved_fp will in fact be a saved value of the unextended SP.  The
-+    // simplest way to tell whether we are returning to such a call site
-+    // is as follows:
-+
-+    CodeBlob cb = cb();
-+    NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull();
-+    if (senderNm != null) {
-+      // If the sender PC is a deoptimization point, get the original
-+      // PC.  For MethodHandle call site the unextended_sp is stored in
-+      // saved_fp.
-+      if (senderNm.isDeoptMhEntry(getPC())) {
-+        raw_unextendedSP = getFP();
-+      }
-+      else if (senderNm.isDeoptEntry(getPC())) {
-+      }
-+      else if (senderNm.isMethodHandleReturn(getPC())) {
-+        raw_unextendedSP = getFP();
-+      }
-+    }
-+  }
-+
-+  private Frame senderForInterpreterFrame(RISCV64RegisterMap map) {
-+    if (DEBUG) {
-+      System.out.println("senderForInterpreterFrame");
-+    }
-+    Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
-+    Address sp = addressOfStackSlot(SENDER_SP_OFFSET);
-+    // We do not need to update the callee-save register mapping because above
-+    // us is either another interpreter frame or a converter-frame, but never
-+    // directly a compiled frame.
-+    // 11/24/04 SFG. With the removal of adapter frames this is no longer true.
-+    // However c2 no longer uses callee save register for java calls so there
-+    // are no callee register to find.
-+
-+    if (map.getUpdateMap())
-+      updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET));
-+
-+    return new RISCV64Frame(sp, unextendedSP, getLink(), getSenderPC());
-+  }
-+
-+  private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) {
-+    map.setLocation(fp, savedFPAddr);
-+  }
-+
-+  private Frame senderForCompiledFrame(RISCV64RegisterMap map, CodeBlob cb) {
-+    if (DEBUG) {
-+      System.out.println("senderForCompiledFrame");
-+    }
-+
-+    //
-+    // NOTE: some of this code is (unfortunately) duplicated  RISCV64CurrentFrameGuess
-+    //
-+
-+    if (Assert.ASSERTS_ENABLED) {
-+      Assert.that(map != null, "map must be set");
-+    }
-+
-+    // frame owned by optimizing compiler
-+    if (Assert.ASSERTS_ENABLED) {
-+        Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size");
-+    }
-+    Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize());
-+
-+    // The return_address is always the word on the stack
-+    Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize());
-+
-+    // This is the saved value of FP which may or may not really be an FP.
-+    // It is only an FP if the sender is an interpreter frame.
-+    Address savedFPAddr = senderSP.addOffsetTo(-2 * VM.getVM().getAddressSize());
-+
-+    if (map.getUpdateMap()) {
-+      // Tell GC to use argument oopmaps for some runtime stubs that need it.
-+      // For C1, the runtime stub might not have oop maps, so set this flag
-+      // outside of update_register_map.
-+      map.setIncludeArgumentOops(cb.callerMustGCArguments());
-+
-+      if (cb.getOopMaps() != null) {
-+        ImmutableOopMapSet.updateRegisterMap(this, cb, map, true);
-+      }
-+
-+      // Since the prolog does the save and restore of FP there is no oopmap
-+      // for it so we must fill in its location as if there was an oopmap entry
-+      // since if our caller was compiled code there could be live jvm state in it.
-+      updateMapWithSavedLink(map, savedFPAddr);
-+    }
-+
-+    return new RISCV64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC);
-+  }
-+
-+  protected boolean hasSenderPD() {
-+    return true;
-+  }
-+
-+  public long frameSize() {
-+    return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize());
-+  }
-+
-+    public Address getLink() {
-+        try {
-+            if (DEBUG) {
-+                System.out.println("Reading link at " + addressOfStackSlot(LINK_OFFSET)
-+                        + " = " + addressOfStackSlot(LINK_OFFSET).getAddressAt(0));
-+            }
-+            return addressOfStackSlot(LINK_OFFSET).getAddressAt(0);
-+        } catch (Exception e) {
-+            if (DEBUG)
-+                System.out.println("Returning null");
-+            return null;
-+        }
-+    }
-+
-+  public Address getUnextendedSP() { return raw_unextendedSP; }
-+
-+  // Return address:
-+  public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); }
-+  public Address getSenderPC()     { return getSenderPCAddr().getAddressAt(0);      }
-+
-+  // return address of param, zero origin index.
-+  public Address getNativeParamAddr(int idx) {
-+    return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx);
-+  }
-+
-+  public Address getSenderSP()     { return addressOfStackSlot(SENDER_SP_OFFSET); }
-+
-+  public Address addressOfInterpreterFrameLocals() {
-+    return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET);
-+  }
-+
-+  private Address addressOfInterpreterFrameBCX() {
-+    return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET);
-+  }
-+
-+  public int getInterpreterFrameBCI() {
-+    // FIXME: this is not atomic with respect to GC and is unsuitable
-+    // for use in a non-debugging, or reflective, system. Need to
-+    // figure out how to express this.
-+    Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0);
-+    Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0);
-+    Method method = (Method)Metadata.instantiateWrapperFor(methodHandle);
-+    return bcpToBci(bcp, method);
-+  }
-+
-+  public Address addressOfInterpreterFrameMDX() {
-+    return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET);
-+  }
-+
-+  // expression stack
-+  // (the max_stack arguments are used by the GC; see class FrameClosure)
-+
-+  public Address addressOfInterpreterFrameExpressionStack() {
-+    Address monitorEnd = interpreterFrameMonitorEnd().address();
-+    return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize());
-+  }
-+
-+  public int getInterpreterFrameExpressionStackDirection() { return -1; }
-+
-+  // top of expression stack
-+  public Address addressOfInterpreterFrameTOS() {
-+    return getSP();
-+  }
-+
-+  /** Expression stack from top down */
-+  public Address addressOfInterpreterFrameTOSAt(int slot) {
-+    return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize());
-+  }
-+
-+  public Address getInterpreterFrameSenderSP() {
-+    if (Assert.ASSERTS_ENABLED) {
-+      Assert.that(isInterpretedFrame(), "interpreted frame expected");
-+    }
-+    return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
-+  }
-+
-+  // Monitors
-+  public BasicObjectLock interpreterFrameMonitorBegin() {
-+    return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET));
-+  }
-+
-+  public BasicObjectLock interpreterFrameMonitorEnd() {
-+    Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0);
-+    if (Assert.ASSERTS_ENABLED) {
-+      // make sure the pointer points inside the frame
-+      Assert.that(AddressOps.gt(getFP(), result), "result must <  than frame pointer");
-+      Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer");
-+    }
-+    return new BasicObjectLock(result);
-+  }
-+
-+  public int interpreterFrameMonitorSize() {
-+    return BasicObjectLock.size();
-+  }
-+
-+  // Method
-+  public Address addressOfInterpreterFrameMethod() {
-+    return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET);
-+  }
-+
-+  // Constant pool cache
-+  public Address addressOfInterpreterFrameCPCache() {
-+    return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET);
-+  }
-+
-+  // Entry frames
-+  public JavaCallWrapper getEntryFrameCallWrapper() {
-+    return new RISCV64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0));
-+  }
-+
-+  protected Address addressOfSavedOopResult() {
-+    // offset is 2 for compiler2 and 3 for compiler1
-+    return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) *
-+                               VM.getVM().getAddressSize());
-+  }
-+
-+  protected Address addressOfSavedReceiver() {
-+    return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
-+  }
-+
-+  private void dumpStack() {
-+    for (Address addr = getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
-+         AddressOps.lt(addr, getSP());
-+         addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
-+      System.out.println(addr + ": " + addr.getAddressAt(0));
-+    }
-+    System.out.println("-----------------------");
-+    for (Address addr = getSP();
-+         AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize()));
-+         addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
-+      System.out.println(addr + ": " + addr.getAddressAt(0));
-+    }
-+  }
-+}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
-new file mode 100644
-index 00000000000..d0ad2b559a6
---- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
-@@ -0,0 +1,61 @@
-+/*
-+ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, Red Hat Inc.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+package sun.jvm.hotspot.runtime.riscv64;
-+
-+import java.util.*;
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.types.*;
-+import sun.jvm.hotspot.runtime.*;
-+import sun.jvm.hotspot.utilities.*;
-+import sun.jvm.hotspot.utilities.Observable;
-+import sun.jvm.hotspot.utilities.Observer;
-+
-+public class RISCV64JavaCallWrapper extends JavaCallWrapper {
-+  private static AddressField lastJavaFPField;
-+
-+  static {
-+    VM.registerVMInitializedObserver(new Observer() {
-+        public void update(Observable o, Object data) {
-+          initialize(VM.getVM().getTypeDataBase());
-+        }
-+      });
-+  }
-+
-+  private static synchronized void initialize(TypeDataBase db) {
-+    Type type = db.lookupType("JavaFrameAnchor");
-+
-+    lastJavaFPField  = type.getAddressField("_last_Java_fp");
-+  }
-+
-+  public RISCV64JavaCallWrapper(Address addr) {
-+    super(addr);
-+  }
-+
-+  public Address getLastJavaFP() {
-+    return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset()));
-+  }
-+}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java
-new file mode 100644
-index 00000000000..4aeb1c6f557
---- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java
-@@ -0,0 +1,53 @@
-+/*
-+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, Red Hat Inc.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+package sun.jvm.hotspot.runtime.riscv64;
-+
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.runtime.*;
-+
-+public class RISCV64RegisterMap extends RegisterMap {
-+
-+  /** This is the only public constructor */
-+  public RISCV64RegisterMap(JavaThread thread, boolean updateMap) {
-+    super(thread, updateMap);
-+  }
-+
-+  protected RISCV64RegisterMap(RegisterMap map) {
-+    super(map);
-+  }
-+
-+  public Object clone() {
-+    RISCV64RegisterMap retval = new RISCV64RegisterMap(this);
-+    return retval;
-+  }
-+
-+  // no PD state to clear or copy:
-+  protected void clearPD() {}
-+  protected void initializePD() {}
-+  protected void initializeFromPD(RegisterMap map) {}
-+  protected Address getLocationPD(VMReg reg) { return null; }
-+}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
-index 7d7a6107cab..6552ce255fc 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -54,7 +54,7 @@ public static String getOS() throws UnsupportedPlatformException {
- 
-   public static boolean knownCPU(String cpu) {
-     final String[] KNOWN =
--        new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64"};
-+        new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64", "riscv64"};
- 
-     for(String s : KNOWN) {
-       if(s.equals(cpu))
-diff --git a/test/hotspot/jtreg/compiler/c2/TestBit.java b/test/hotspot/jtreg/compiler/c2/TestBit.java
-index 7805918c28a..823b9f39dbf 100644
---- a/test/hotspot/jtreg/compiler/c2/TestBit.java
-+++ b/test/hotspot/jtreg/compiler/c2/TestBit.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -34,7 +34,7 @@
-  *
-  * @run driver compiler.c2.TestBit
-  *
-- * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le"
-+ * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le" | os.arch == "riscv64"
-  * @requires vm.debug == true & vm.compiler2.enabled
-  */
- public class TestBit {
-@@ -54,7 +54,8 @@ static void runTest(String testName) throws Exception {
-         String expectedTestBitInstruction =
-             "ppc64le".equals(System.getProperty("os.arch")) ? "ANDI" :
-             "aarch64".equals(System.getProperty("os.arch")) ? "tb"   :
--            "amd64".equals(System.getProperty("os.arch"))   ? "test" : null;
-+            "amd64".equals(System.getProperty("os.arch"))   ? "test" :
-+            "riscv64".equals(System.getProperty("os.arch")) ? "andi" : null;
- 
-         if (expectedTestBitInstruction != null) {
-             output.shouldContain(expectedTestBitInstruction);
-diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java
-index 558b4218f0b..55374b116e6 100644
---- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java
-+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -42,6 +42,7 @@
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
-+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
- import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
- 
-@@ -54,6 +55,8 @@ public static void main(String args[]) throws Throwable {
-                         SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
-                 new GenericTestCaseForUnsupportedAArch64CPU(
-                         SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
-+                new GenericTestCaseForUnsupportedRISCV64CPU(
-+                        SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
-                 new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU(
-                         SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
-                 new GenericTestCaseForOtherCPU(
-diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java
-index 3ed72bf0a99..8fb82ee4531 100644
---- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java
-+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -42,6 +42,7 @@
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
-+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
- import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
- 
-@@ -54,6 +55,8 @@ public static void main(String args[]) throws Throwable {
-                         SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
-                 new GenericTestCaseForUnsupportedAArch64CPU(
-                         SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
-+                new GenericTestCaseForUnsupportedRISCV64CPU(
-+                        SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
-                 new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU(
-                         SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
-                 new GenericTestCaseForOtherCPU(
-diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java
-index c05cf309dae..aca32137eda 100644
---- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java
-+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -42,6 +42,7 @@
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
-+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
- import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
- 
-@@ -54,6 +55,8 @@ public static void main(String args[]) throws Throwable {
-                         SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
-                 new GenericTestCaseForUnsupportedAArch64CPU(
-                         SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
-+                new GenericTestCaseForUnsupportedRISCV64CPU(
-+                        SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
-                 new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU(
-                         SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
-                 new GenericTestCaseForOtherCPU(
-diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java
-index 58ce5366bae..8deac4f7895 100644
---- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java
-+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -41,6 +41,7 @@
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
-+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
- import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
- import compiler.intrinsics.sha.cli.testcases.UseSHASpecificTestCaseForUnsupportedCPU;
- 
-@@ -53,6 +54,8 @@ public static void main(String args[]) throws Throwable {
-                         SHAOptionsBase.USE_SHA_OPTION),
-                 new GenericTestCaseForUnsupportedAArch64CPU(
-                         SHAOptionsBase.USE_SHA_OPTION),
-+                new GenericTestCaseForUnsupportedRISCV64CPU(
-+                        SHAOptionsBase.USE_SHA_OPTION),
-                 new UseSHASpecificTestCaseForUnsupportedCPU(
-                         SHAOptionsBase.USE_SHA_OPTION),
-                 new GenericTestCaseForOtherCPU(
-diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
-index faa9fdbae67..26635002040 100644
---- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
-+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -32,26 +32,27 @@
- 
- /**
-  * Generic test case for SHA-related options targeted to any CPU except
-- * AArch64, PPC, S390x, SPARC and X86.
-+ * AArch64, RISCV64, PPC, S390x, SPARC and X86.
-  */
- public class GenericTestCaseForOtherCPU extends
-         SHAOptionsBase.TestCase {
-     public GenericTestCaseForOtherCPU(String optionName) {
--        // Execute the test case on any CPU except AArch64, PPC, S390x, SPARC and X86.
-+        // Execute the test case on any CPU except AArch64, RISCV64, PPC, S390x, SPARC and X86.
-         super(optionName, new NotPredicate(
-                               new OrPredicate(Platform::isAArch64,
-+                              new OrPredicate(Platform::isRISCV64,
-                               new OrPredicate(Platform::isS390x,
-                               new OrPredicate(Platform::isSparc,
-                               new OrPredicate(Platform::isPPC,
-                               new OrPredicate(Platform::isX64,
--                                              Platform::isX86)))))));
-+                                              Platform::isX86))))))));
-     }
- 
-     @Override
-     protected void verifyWarnings() throws Throwable {
-         String shouldPassMessage = String.format("JVM should start with "
-                 + "option '%s' without any warnings", optionName);
--        // Verify that on non-x86, non-SPARC and non-AArch64 CPU usage of
-+        // Verify that on non-x86, non-RISCV64, non-SPARC and non-AArch64 CPU usage of
-         //  SHA-related options will not cause any warnings.
-         CommandLineOptionTest.verifySameJVMStartup(null,
-                 new String[] { ".*" + optionName + ".*" }, shouldPassMessage,
-diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
-new file mode 100644
-index 00000000000..2ecfec07a4c
---- /dev/null
-+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
-@@ -0,0 +1,115 @@
-+/*
-+ * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ */
-+
-+package compiler.intrinsics.sha.cli.testcases;
-+
-+import compiler.intrinsics.sha.cli.DigestOptionsBase;
-+import jdk.test.lib.process.ExitCode;
-+import jdk.test.lib.Platform;
-+import jdk.test.lib.cli.CommandLineOptionTest;
-+import jdk.test.lib.cli.predicate.AndPredicate;
-+import jdk.test.lib.cli.predicate.NotPredicate;
-+
-+/**
-+ * Generic test case for SHA-related options targeted to RISCV64 CPUs
-+ * which don't support instruction required by the tested option.
-+ */
-+public class GenericTestCaseForUnsupportedRISCV64CPU extends
-+        DigestOptionsBase.TestCase {
-+
-+    final private boolean checkUseSHA;
-+
-+    public GenericTestCaseForUnsupportedRISCV64CPU(String optionName) {
-+        this(optionName, true);
-+    }
-+
-+    public GenericTestCaseForUnsupportedRISCV64CPU(String optionName, boolean checkUseSHA) {
-+        super(optionName, new AndPredicate(Platform::isRISCV64,
-+                new NotPredicate(DigestOptionsBase.getPredicateForOption(
-+                        optionName))));
-+
-+        this.checkUseSHA = checkUseSHA;
-+    }
-+
-+    @Override
-+    protected void verifyWarnings() throws Throwable {
-+        String shouldPassMessage = String.format("JVM startup should pass with"
-+                + "option '-XX:-%s' without any warnings", optionName);
-+        //Verify that option could be disabled without any warnings.
-+        CommandLineOptionTest.verifySameJVMStartup(null, new String[] {
-+                        DigestOptionsBase.getWarningForUnsupportedCPU(optionName)
-+                }, shouldPassMessage, shouldPassMessage, ExitCode.OK,
-+                DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-+                CommandLineOptionTest.prepareBooleanFlag(optionName, false));
-+
-+        if (checkUseSHA) {
-+            shouldPassMessage = String.format("If JVM is started with '-XX:-"
-+                    + "%s' '-XX:+%s', output should contain warning.",
-+                    DigestOptionsBase.USE_SHA_OPTION, optionName);
-+
-+            // Verify that when the tested option is enabled, then
-+            // a warning will occur in VM output if UseSHA is disabled.
-+            if (!optionName.equals(DigestOptionsBase.USE_SHA_OPTION)) {
-+                CommandLineOptionTest.verifySameJVMStartup(
-+                        new String[] { DigestOptionsBase.getWarningForUnsupportedCPU(optionName) },
-+                        null,
-+                        shouldPassMessage,
-+                        shouldPassMessage,
-+                        ExitCode.OK,
-+                        DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-+                        CommandLineOptionTest.prepareBooleanFlag(DigestOptionsBase.USE_SHA_OPTION, false),
-+                        CommandLineOptionTest.prepareBooleanFlag(optionName, true));
-+            }
-+        }
-+    }
-+
-+    @Override
-+    protected void verifyOptionValues() throws Throwable {
-+        // Verify that option is disabled by default.
-+        CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
-+                String.format("Option '%s' should be disabled by default",
-+                        optionName),
-+                DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS);
-+
-+        if (checkUseSHA) {
-+            // Verify that option is disabled even if it was explicitly enabled
-+            // using CLI options.
-+            CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
-+                    String.format("Option '%s' should be off on unsupported "
-+                            + "RISCV64CPU even if set to true directly", optionName),
-+                    DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-+                    CommandLineOptionTest.prepareBooleanFlag(optionName, true));
-+
-+            // Verify that option is disabled when +UseSHA was passed to JVM.
-+            CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
-+                    String.format("Option '%s' should be off on unsupported "
-+                            + "RISCV64CPU even if %s flag set to JVM",
-+                            optionName, CommandLineOptionTest.prepareBooleanFlag(
-+                                DigestOptionsBase.USE_SHA_OPTION, true)),
-+                    DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-+                    CommandLineOptionTest.prepareBooleanFlag(
-+                            DigestOptionsBase.USE_SHA_OPTION, true));
-+        }
-+    }
-+}
-diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java
-index 2e3e2717a65..7be8af6d035 100644
---- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java
-+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -25,7 +25,7 @@
-  * @test
-  * @bug 8074981
-  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test
-- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
-+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
-  *
-  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
-  *      -XX:CompileThresholdScaling=0.1
-diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java
-index 0e06a9e4327..797927b42bf 100644
---- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java
-+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -25,7 +25,7 @@
-  * @test
-  * @bug 8074981
-  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test
-- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
-+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
-  *
-  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
-  *      -XX:CompileThresholdScaling=0.1
-diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java
-index c3cdbf37464..be8f7d586c2 100644
---- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java
-+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -25,7 +25,7 @@
-  * @test
-  * @bug 8074981
-  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test
-- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
-+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
-  *
-  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
-  *      -XX:CompileThresholdScaling=0.1
-diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java
-index d33bd411f16..d96d5e29c00 100644
---- a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java
-+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -25,7 +25,7 @@
-  * @test
-  * @bug 8074981
-  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test
-- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
-+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
-  *
-  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions
-  *      -XX:LoopUnrollLimit=250 -XX:CompileThresholdScaling=0.1
-diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java
-index 992fa4b5161..b09c873d05d 100644
---- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java
-+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -25,7 +25,7 @@
-  * @test
-  * @bug 8138583
-  * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : double abs & neg test
-- * @requires os.arch=="aarch64"
-+ * @requires os.arch=="aarch64" | os.arch=="riscv64"
-  *
-  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
-  *      -XX:CompileThresholdScaling=0.1
-diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java
-index 3e79b3528b7..fe40ed6f98d 100644
---- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java
-+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -25,7 +25,7 @@
-  * @test
-  * @bug 8138583
-  * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : float abs & neg test
-- * @requires os.arch=="aarch64"
-+ * @requires os.arch=="aarch64" | os.arch=="riscv64"
-  *
-  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
-  *      -XX:CompileThresholdScaling=0.1
-diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java
-index 6603dd224ef..51631910493 100644
---- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java
-+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -25,7 +25,7 @@
-  * @test
-  * @bug 8135028
-  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double sqrt test
-- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
-+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
-  *
-  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
-  *      -XX:CompileThresholdScaling=0.1
-diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java
-index d9a0c988004..d999ae423cf 100644
---- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java
-+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -25,7 +25,7 @@
-  * @test
-  * @bug 8074981
-  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double test
-- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
-+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
-  *
-  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
-  *      -XX:CompileThresholdScaling=0.1
-diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java
-index 722db95aed3..65912a5c7fa 100644
---- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java
-+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -25,7 +25,7 @@
-  * @test
-  * @bug 8074981
-  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : float test
-- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
-+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
-  *
-  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
-  *      -XX:CompileThresholdScaling=0.1
-diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java
-index f58f21feb23..fffdc2f7565 100644
---- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java
-+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -25,7 +25,7 @@
-  * @test
-  * @bug 8074981
-  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : int test
-- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
-+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
-  *
-  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
-  *      -XX:CompileThresholdScaling=0.1
-diff --git a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
-index 7774dabcb5f..7afe3560f30 100644
---- a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
-+++ b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -61,15 +61,17 @@ public class IntrinsicPredicates {
- 
-     public static final BooleanSupplier SHA1_INSTRUCTION_AVAILABLE
-             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha1" }, null),
-+              new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha1" }, null),
-               new OrPredicate(new CPUSpecificPredicate("s390.*",    new String[] { "sha1" }, null),
-               new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha1" }, null),
-               // x86 variants
-               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "sha" },  null),
-               new OrPredicate(new CPUSpecificPredicate("i386.*",    new String[] { "sha" },  null),
--                              new CPUSpecificPredicate("x86.*",     new String[] { "sha" },  null))))));
-+                              new CPUSpecificPredicate("x86.*",     new String[] { "sha" },  null)))))));
- 
-     public static final BooleanSupplier SHA256_INSTRUCTION_AVAILABLE
-             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha256"       }, null),
-+              new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha256"       }, null),
-               new OrPredicate(new CPUSpecificPredicate("s390.*",    new String[] { "sha256"       }, null),
-               new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha256"       }, null),
-               new OrPredicate(new CPUSpecificPredicate("ppc64.*",   new String[] { "sha"          }, null),
-@@ -79,10 +81,11 @@ public class IntrinsicPredicates {
-               new OrPredicate(new CPUSpecificPredicate("i386.*",    new String[] { "sha"          }, null),
-               new OrPredicate(new CPUSpecificPredicate("x86.*",     new String[] { "sha"          }, null),
-               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "avx2", "bmi2" }, null),
--                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null))))))))));
-+                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null)))))))))));
- 
-     public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE
-             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha512"       }, null),
-+              new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha512"       }, null),
-               new OrPredicate(new CPUSpecificPredicate("s390.*",    new String[] { "sha512"       }, null),
-               new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha512"       }, null),
-               new OrPredicate(new CPUSpecificPredicate("ppc64.*",   new String[] { "sha"          }, null),
-@@ -92,7 +95,7 @@ public class IntrinsicPredicates {
-               new OrPredicate(new CPUSpecificPredicate("i386.*",    new String[] { "sha"          }, null),
-               new OrPredicate(new CPUSpecificPredicate("x86.*",     new String[] { "sha"          }, null),
-               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "avx2", "bmi2" }, null),
--                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null))))))))));
-+                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null)))))))))));
- 
-     public static final BooleanSupplier ANY_SHA_INSTRUCTION_AVAILABLE
-             = new OrPredicate(IntrinsicPredicates.SHA1_INSTRUCTION_AVAILABLE,
-diff --git a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java
-index 57256aa5a32..d4d43b01ae6 100644
---- a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java
-+++ b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -112,7 +112,7 @@ public static void main(String args[]) throws Exception {
-             // It's ok for ARM not to have symbols, because it does not support NMT detail
-             // when targeting thumb2. It's also ok for Windows not to have symbols, because
-             // they are only available if the symbols file is included with the build.
--            if (Platform.isWindows() || Platform.isARM()) {
-+            if (Platform.isWindows() || Platform.isARM() || Platform.isRISCV64()) {
-                 return; // we are done
-             }
-             output.reportDiagnosticSummary();
-diff --git a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
-index 127bb6abcd9..eab19273ad8 100644
---- a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
-+++ b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -239,7 +239,7 @@ private static boolean isAlwaysSupportedPlatform() {
-         return Platform.isAix() ||
-             (Platform.isLinux() &&
-              (Platform.isPPC() || Platform.isS390x() || Platform.isX64() ||
--              Platform.isX86())) ||
-+              Platform.isX86() || Platform.isRISCV64())) ||
-             Platform.isOSX() ||
-             Platform.isSolaris();
-     }
-diff --git a/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java b/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java
-index 54640b245f8..f0b7aed5ceb 100644
---- a/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java
-+++ b/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java
-@@ -1,5 +1,4 @@
- /*
-- * Copyright (c) 2018, Google and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-diff --git a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
-index 77458554b76..d4bfe31dd7a 100644
---- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
-+++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
-@@ -45,7 +45,7 @@
-  */
- public class TestMutuallyExclusivePlatformPredicates {
-     private static enum MethodGroup {
--        ARCH("isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"),
-+        ARCH("isAArch64", "isARM", "isRISCV64", "isPPC", "isS390x", "isSparc", "isX64", "isX86"),
-         BITNESS("is32bit", "is64bit"),
-         OS("isAix", "isLinux", "isOSX", "isSolaris", "isWindows"),
-         VM_TYPE("isClient", "isServer", "isGraal", "isMinimal", "isZero", "isEmbedded"),
-diff --git a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
-index 7990c49a1f6..abeff80e5e8 100644
---- a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
-+++ b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2013, 2022, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -54,8 +54,8 @@ public static void main(String[] args) throws Throwable {
-             Events.assertField(event, "hwThreads").atLeast(1);
-             Events.assertField(event, "cores").atLeast(1);
-             Events.assertField(event, "sockets").atLeast(1);
--            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390");
--            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390");
-+            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390");
-+            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390");
-         }
-     }
- }
-diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java
-index f4ee0546c70..635cdd18054 100644
---- a/test/lib/jdk/test/lib/Platform.java
-+++ b/test/lib/jdk/test/lib/Platform.java
-@@ -202,6 +202,10 @@ public static boolean isARM() {
-         return isArch("arm.*");
-     }
- 
-+    public static boolean isRISCV64() {
-+        return isArch("riscv64");
-+    }
-+
-     public static boolean isPPC() {
-         return isArch("ppc.*");
-     }
-
-From c51e546566c937354842a27696bd2221087101ae Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 28 Mar 2023 16:30:04 +0800
-Subject: [PATCH 002/140] Drop zgc part
-
----
- .../cpu/riscv/c1_LIRAssembler_riscv.cpp       |   6 +-
- .../riscv/gc/z/zBarrierSetAssembler_riscv.cpp | 441 ------------------
- .../riscv/gc/z/zBarrierSetAssembler_riscv.hpp | 101 ----
- src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp | 212 ---------
- src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp |  36 --
- src/hotspot/cpu/riscv/gc/z/z_riscv64.ad       | 233 ---------
- .../cpu/riscv/macroAssembler_riscv.cpp        |  46 --
- .../cpu/riscv/macroAssembler_riscv.hpp        |   9 -
- src/hotspot/cpu/riscv/stubGenerator_riscv.cpp |  10 -
- 9 files changed, 1 insertion(+), 1093 deletions(-)
- delete mode 100644 src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
- delete mode 100644 src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp
- delete mode 100644 src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp
- delete mode 100644 src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp
- delete mode 100644 src/hotspot/cpu/riscv/gc/z/z_riscv64.ad
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-index 742c2126e60..bba3bd4709c 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-@@ -871,11 +871,7 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch
-     if (UseCompressedOops && !wide) {
-       __ decode_heap_oop(dest->as_register());
-     }
--
--    if (!UseZGC) {
--      // Load barrier has not yet been applied, so ZGC can't verify the oop here
--      __ verify_oop(dest->as_register());
--    }
-+    __ verify_oop(dest->as_register());
-   }
- }
- 
-diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
-deleted file mode 100644
-index 3d3f4d4d774..00000000000
---- a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
-+++ /dev/null
-@@ -1,441 +0,0 @@
--/*
-- * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#include "precompiled.hpp"
--#include "asm/macroAssembler.inline.hpp"
--#include "code/codeBlob.hpp"
--#include "code/vmreg.inline.hpp"
--#include "gc/z/zBarrier.inline.hpp"
--#include "gc/z/zBarrierSet.hpp"
--#include "gc/z/zBarrierSetAssembler.hpp"
--#include "gc/z/zBarrierSetRuntime.hpp"
--#include "gc/z/zThreadLocalData.hpp"
--#include "memory/resourceArea.hpp"
--#include "runtime/sharedRuntime.hpp"
--#include "utilities/macros.hpp"
--#ifdef COMPILER1
--#include "c1/c1_LIRAssembler.hpp"
--#include "c1/c1_MacroAssembler.hpp"
--#include "gc/z/c1/zBarrierSetC1.hpp"
--#endif // COMPILER1
--#ifdef COMPILER2
--#include "gc/z/c2/zBarrierSetC2.hpp"
--#endif // COMPILER2
--
--#ifdef PRODUCT
--#define BLOCK_COMMENT(str) /* nothing */
--#else
--#define BLOCK_COMMENT(str) __ block_comment(str)
--#endif
--
--#undef __
--#define __ masm->
--
--void ZBarrierSetAssembler::load_at(MacroAssembler* masm,
--                                   DecoratorSet decorators,
--                                   BasicType type,
--                                   Register dst,
--                                   Address src,
--                                   Register tmp1,
--                                   Register tmp_thread) {
--  if (!ZBarrierSet::barrier_needed(decorators, type)) {
--    // Barrier not needed
--    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
--    return;
--  }
--
--  assert_different_registers(t1, src.base());
--  assert_different_registers(t0, t1, dst);
--
--  Label done;
--
--  // Load bad mask into temp register.
--  __ la(t0, src);
--  __ ld(t1, address_bad_mask_from_thread(xthread));
--  __ ld(dst, Address(t0));
--
--  // Test reference against bad mask. If mask bad, then we need to fix it up.
--  __ andr(t1, dst, t1);
--  __ beqz(t1, done);
--
--  __ enter();
--
--  __ push_call_clobbered_registers_except(RegSet::of(dst));
--
--  if (c_rarg0 != dst) {
--    __ mv(c_rarg0, dst);
--  }
--
--  __ mv(c_rarg1, t0);
--
--  __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2);
--
--  // Make sure dst has the return value.
--  if (dst != x10) {
--    __ mv(dst, x10);
--  }
--
--  __ pop_call_clobbered_registers_except(RegSet::of(dst));
--  __ leave();
--
--  __ bind(done);
--}
--
--#ifdef ASSERT
--
--void ZBarrierSetAssembler::store_at(MacroAssembler* masm,
--                                    DecoratorSet decorators,
--                                    BasicType type,
--                                    Address dst,
--                                    Register val,
--                                    Register tmp1,
--                                    Register tmp2) {
--  // Verify value
--  if (is_reference_type(type)) {
--    // Note that src could be noreg, which means we
--    // are storing null and can skip verification.
--    if (val != noreg) {
--      Label done;
--
--      // tmp1 and tmp2 are often set to noreg.
--      RegSet savedRegs = RegSet::of(t0);
--      __ push_reg(savedRegs, sp);
--
--      __ ld(t0, address_bad_mask_from_thread(xthread));
--      __ andr(t0, val, t0);
--      __ beqz(t0, done);
--      __ stop("Verify oop store failed");
--      __ should_not_reach_here();
--      __ bind(done);
--      __ pop_reg(savedRegs, sp);
--    }
--  }
--
--  // Store value
--  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
--}
--
--#endif // ASSERT
--
--void ZBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm,
--                                              DecoratorSet decorators,
--                                              bool is_oop,
--                                              Register src,
--                                              Register dst,
--                                              Register count,
--                                              RegSet saved_regs) {
--  if (!is_oop) {
--    // Barrier not needed
--    return;
--  }
--
--  BLOCK_COMMENT("ZBarrierSetAssembler::arraycopy_prologue {");
--
--  assert_different_registers(src, count, t0);
--
--  __ push_reg(saved_regs, sp);
--
--  if (count == c_rarg0 && src == c_rarg1) {
--    // exactly backwards!!
--    __ xorr(c_rarg0, c_rarg0, c_rarg1);
--    __ xorr(c_rarg1, c_rarg0, c_rarg1);
--    __ xorr(c_rarg0, c_rarg0, c_rarg1);
--  } else {
--    __ mv(c_rarg0, src);
--    __ mv(c_rarg1, count);
--  }
--
--  __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_array_addr(), 2);
--
--  __ pop_reg(saved_regs, sp);
--
--  BLOCK_COMMENT("} ZBarrierSetAssembler::arraycopy_prologue");
--}
--
--void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm,
--                                                         Register jni_env,
--                                                         Register robj,
--                                                         Register tmp,
--                                                         Label& slowpath) {
--  BLOCK_COMMENT("ZBarrierSetAssembler::try_resolve_jobject_in_native {");
--
--  assert_different_registers(jni_env, robj, tmp);
--
--  // Resolve jobject
--  BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, robj, tmp, slowpath);
--
--  // Compute the offset of address bad mask from the field of jni_environment
--  long int bad_mask_relative_offset = (long int) (in_bytes(ZThreadLocalData::address_bad_mask_offset()) -
--                                                  in_bytes(JavaThread::jni_environment_offset()));
--
--  // Load the address bad mask
--  __ ld(tmp, Address(jni_env, bad_mask_relative_offset));
--
--  // Check address bad mask
--  __ andr(tmp, robj, tmp);
--  __ bnez(tmp, slowpath);
--
--  BLOCK_COMMENT("} ZBarrierSetAssembler::try_resolve_jobject_in_native");
--}
--
--#ifdef COMPILER2
--
--OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
--  if (!OptoReg::is_reg(opto_reg)) {
--    return OptoReg::Bad;
--  }
--
--  const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
--  if (vm_reg->is_FloatRegister()) {
--    return opto_reg & ~1;
--  }
--
--  return opto_reg;
--}
--
--#undef __
--#define __ _masm->
--
--class ZSaveLiveRegisters {
--private:
--  MacroAssembler* const _masm;
--  RegSet                _gp_regs;
--  FloatRegSet           _fp_regs;
--  VectorRegSet          _vp_regs;
--
--public:
--  void initialize(ZLoadBarrierStubC2* stub) {
--    // Record registers that needs to be saved/restored
--    RegMaskIterator rmi(stub->live());
--    while (rmi.has_next()) {
--      const OptoReg::Name opto_reg = rmi.next();
--      if (OptoReg::is_reg(opto_reg)) {
--        const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
--        if (vm_reg->is_Register()) {
--          _gp_regs += RegSet::of(vm_reg->as_Register());
--        } else if (vm_reg->is_FloatRegister()) {
--          _fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister());
--        } else if (vm_reg->is_VectorRegister()) {
--          const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~(VectorRegisterImpl::max_slots_per_register - 1));
--          _vp_regs += VectorRegSet::of(vm_reg_base->as_VectorRegister());
--        } else {
--          fatal("Unknown register type");
--        }
--      }
--    }
--
--    // Remove C-ABI SOE registers, tmp regs and _ref register that will be updated
--    _gp_regs -= RegSet::range(x18, x27) + RegSet::of(x2) + RegSet::of(x8, x9) + RegSet::of(x5, stub->ref());
--  }
--
--  ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
--      _masm(masm),
--      _gp_regs(),
--      _fp_regs(),
--      _vp_regs() {
--    // Figure out what registers to save/restore
--    initialize(stub);
--
--    // Save registers
--    __ push_reg(_gp_regs, sp);
--    __ push_fp(_fp_regs, sp);
--    __ push_vp(_vp_regs, sp);
--  }
--
--  ~ZSaveLiveRegisters() {
--    // Restore registers
--    __ pop_vp(_vp_regs, sp);
--    __ pop_fp(_fp_regs, sp);
--    __ pop_reg(_gp_regs, sp);
--  }
--};
--
--class ZSetupArguments {
--private:
--  MacroAssembler* const _masm;
--  const Register        _ref;
--  const Address         _ref_addr;
--
--public:
--  ZSetupArguments(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
--      _masm(masm),
--      _ref(stub->ref()),
--      _ref_addr(stub->ref_addr()) {
--
--    // Setup arguments
--    if (_ref_addr.base() == noreg) {
--      // No self healing
--      if (_ref != c_rarg0) {
--        __ mv(c_rarg0, _ref);
--      }
--      __ mv(c_rarg1, zr);
--    } else {
--      // Self healing
--      if (_ref == c_rarg0) {
--        // _ref is already at correct place
--        __ la(c_rarg1, _ref_addr);
--      } else if (_ref != c_rarg1) {
--        // _ref is in wrong place, but not in c_rarg1, so fix it first
--        __ la(c_rarg1, _ref_addr);
--        __ mv(c_rarg0, _ref);
--      } else if (_ref_addr.base() != c_rarg0) {
--        assert(_ref == c_rarg1, "Mov ref first, vacating c_rarg0");
--        __ mv(c_rarg0, _ref);
--        __ la(c_rarg1, _ref_addr);
--      } else {
--        assert(_ref == c_rarg1, "Need to vacate c_rarg1 and _ref_addr is using c_rarg0");
--        if (_ref_addr.base() == c_rarg0) {
--          __ mv(t1, c_rarg1);
--          __ la(c_rarg1, _ref_addr);
--          __ mv(c_rarg0, t1);
--        } else {
--          ShouldNotReachHere();
--        }
--      }
--    }
--  }
--
--  ~ZSetupArguments() {
--    // Transfer result
--    if (_ref != x10) {
--      __ mv(_ref, x10);
--    }
--  }
--};
--
--#undef __
--#define __ masm->
--
--void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const {
--  BLOCK_COMMENT("ZLoadBarrierStubC2");
--
--  // Stub entry
--  __ bind(*stub->entry());
--
--  {
--    ZSaveLiveRegisters save_live_registers(masm, stub);
--    ZSetupArguments setup_arguments(masm, stub);
--    int32_t offset = 0;
--    __ la_patchable(t0, stub->slow_path(), offset);
--    __ jalr(x1, t0, offset);
--  }
--
--  // Stub exit
--  __ j(*stub->continuation());
--}
--
--#undef __
--
--#endif // COMPILER2
--
--#ifdef COMPILER1
--#undef __
--#define __ ce->masm()->
--
--void ZBarrierSetAssembler::generate_c1_load_barrier_test(LIR_Assembler* ce,
--                                                         LIR_Opr ref) const {
--  assert_different_registers(xthread, ref->as_register(), t1);
--  __ ld(t1, address_bad_mask_from_thread(xthread));
--  __ andr(t1, t1, ref->as_register());
--}
--
--void ZBarrierSetAssembler::generate_c1_load_barrier_stub(LIR_Assembler* ce,
--                                                         ZLoadBarrierStubC1* stub) const {
--  // Stub entry
--  __ bind(*stub->entry());
--
--  Register ref = stub->ref()->as_register();
--  Register ref_addr = noreg;
--  Register tmp = noreg;
--
--  if (stub->tmp()->is_valid()) {
--    // Load address into tmp register
--    ce->leal(stub->ref_addr(), stub->tmp());
--    ref_addr = tmp = stub->tmp()->as_pointer_register();
--  } else {
--    // Address already in register
--    ref_addr = stub->ref_addr()->as_address_ptr()->base()->as_pointer_register();
--  }
--
--  assert_different_registers(ref, ref_addr, noreg);
--
--  // Save x10 unless it is the result or tmp register
--  // Set up SP to accomodate parameters and maybe x10.
--  if (ref != x10 && tmp != x10) {
--    __ sub(sp, sp, 32);
--    __ sd(x10, Address(sp, 16));
--  } else {
--    __ sub(sp, sp, 16);
--  }
--
--  // Setup arguments and call runtime stub
--  ce->store_parameter(ref_addr, 1);
--  ce->store_parameter(ref, 0);
--
--  __ far_call(stub->runtime_stub());
--
--  // Verify result
--  __ verify_oop(x10, "Bad oop");
--
--
--  // Move result into place
--  if (ref != x10) {
--    __ mv(ref, x10);
--  }
--
--  // Restore x10 unless it is the result or tmp register
--  if (ref != x10 && tmp != x10) {
--    __ ld(x10, Address(sp, 16));
--    __ add(sp, sp, 32);
--  } else {
--    __ add(sp, sp, 16);
--  }
--
--  // Stub exit
--  __ j(*stub->continuation());
--}
--
--#undef __
--#define __ sasm->
--
--void ZBarrierSetAssembler::generate_c1_load_barrier_runtime_stub(StubAssembler* sasm,
--                                                                 DecoratorSet decorators) const {
--  __ prologue("zgc_load_barrier stub", false);
--
--  __ push_call_clobbered_registers_except(RegSet::of(x10));
--
--  // Setup arguments
--  __ load_parameter(0, c_rarg0);
--  __ load_parameter(1, c_rarg1);
--
--  __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2);
--
--  __ pop_call_clobbered_registers_except(RegSet::of(x10));
--
--  __ epilogue();
--}
--
--#undef __
--#endif // COMPILER1
-diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp
-deleted file mode 100644
-index dc07ab635fe..00000000000
---- a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp
-+++ /dev/null
-@@ -1,101 +0,0 @@
--/*
-- * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#ifndef CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP
--#define CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP
--
--#include "code/vmreg.hpp"
--#include "oops/accessDecorators.hpp"
--#ifdef COMPILER2
--#include "opto/optoreg.hpp"
--#endif // COMPILER2
--
--#ifdef COMPILER1
--class LIR_Assembler;
--class LIR_Opr;
--class StubAssembler;
--class ZLoadBarrierStubC1;
--#endif // COMPILER1
--
--#ifdef COMPILER2
--class Node;
--class ZLoadBarrierStubC2;
--#endif // COMPILER2
--
--class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase {
--public:
--  virtual void load_at(MacroAssembler* masm,
--                       DecoratorSet decorators,
--                       BasicType type,
--                       Register dst,
--                       Address src,
--                       Register tmp1,
--                       Register tmp_thread);
--
--#ifdef ASSERT
--  virtual void store_at(MacroAssembler* masm,
--                        DecoratorSet decorators,
--                        BasicType type,
--                        Address dst,
--                        Register val,
--                        Register tmp1,
--                        Register tmp2);
--#endif // ASSERT
--
--  virtual void arraycopy_prologue(MacroAssembler* masm,
--                                  DecoratorSet decorators,
--                                  bool is_oop,
--                                  Register src,
--                                  Register dst,
--                                  Register count,
--                                  RegSet saved_regs);
--
--  virtual void try_resolve_jobject_in_native(MacroAssembler* masm,
--                                             Register jni_env,
--                                             Register robj,
--                                             Register tmp,
--                                             Label& slowpath);
--
--#ifdef COMPILER1
--  void generate_c1_load_barrier_test(LIR_Assembler* ce,
--                                     LIR_Opr ref) const;
--
--  void generate_c1_load_barrier_stub(LIR_Assembler* ce,
--                                     ZLoadBarrierStubC1* stub) const;
--
--  void generate_c1_load_barrier_runtime_stub(StubAssembler* sasm,
--                                             DecoratorSet decorators) const;
--#endif // COMPILER1
--
--#ifdef COMPILER2
--  OptoReg::Name refine_register(const Node* node,
--                                OptoReg::Name opto_reg);
--
--  void generate_c2_load_barrier_stub(MacroAssembler* masm,
--                                     ZLoadBarrierStubC2* stub) const;
--#endif // COMPILER2
--};
--
--#endif // CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp
-deleted file mode 100644
-index d14997790af..00000000000
---- a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp
-+++ /dev/null
-@@ -1,212 +0,0 @@
--/*
-- * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#include "precompiled.hpp"
--#include "gc/shared/gcLogPrecious.hpp"
--#include "gc/shared/gc_globals.hpp"
--#include "gc/z/zGlobals.hpp"
--#include "runtime/globals.hpp"
--#include "runtime/os.hpp"
--#include "utilities/globalDefinitions.hpp"
--#include "utilities/powerOfTwo.hpp"
--
--#ifdef LINUX
--#include <sys/mman.h>
--#endif // LINUX
--
--//
--// The heap can have three different layouts, depending on the max heap size.
--//
--// Address Space & Pointer Layout 1
--// --------------------------------
--//
--//  +--------------------------------+ 0x00007FFFFFFFFFFF (127TB)
--//  .                                .
--//  .                                .
--//  .                                .
--//  +--------------------------------+ 0x0000014000000000 (20TB)
--//  |         Remapped View          |
--//  +--------------------------------+ 0x0000010000000000 (16TB)
--//  .                                .
--//  +--------------------------------+ 0x00000c0000000000 (12TB)
--//  |         Marked1 View           |
--//  +--------------------------------+ 0x0000080000000000 (8TB)
--//  |         Marked0 View           |
--//  +--------------------------------+ 0x0000040000000000 (4TB)
--//  .                                .
--//  +--------------------------------+ 0x0000000000000000
--//
--//   6                  4 4  4 4
--//   3                  6 5  2 1                                             0
--//  +--------------------+----+-----------------------------------------------+
--//  |00000000 00000000 00|1111|11 11111111 11111111 11111111 11111111 11111111|
--//  +--------------------+----+-----------------------------------------------+
--//  |                    |    |
--//  |                    |    * 41-0 Object Offset (42-bits, 4TB address space)
--//  |                    |
--//  |                    * 45-42 Metadata Bits (4-bits)  0001 = Marked0      (Address view 4-8TB)
--//  |                                                    0010 = Marked1      (Address view 8-12TB)
--//  |                                                    0100 = Remapped     (Address view 16-20TB)
--//  |                                                    1000 = Finalizable  (Address view N/A)
--//  |
--//  * 63-46 Fixed (18-bits, always zero)
--//
--//
--// Address Space & Pointer Layout 2
--// --------------------------------
--//
--//  +--------------------------------+ 0x00007FFFFFFFFFFF (127TB)
--//  .                                .
--//  .                                .
--//  .                                .
--//  +--------------------------------+ 0x0000280000000000 (40TB)
--//  |         Remapped View          |
--//  +--------------------------------+ 0x0000200000000000 (32TB)
--//  .                                .
--//  +--------------------------------+ 0x0000180000000000 (24TB)
--//  |         Marked1 View           |
--//  +--------------------------------+ 0x0000100000000000 (16TB)
--//  |         Marked0 View           |
--//  +--------------------------------+ 0x0000080000000000 (8TB)
--//  .                                .
--//  +--------------------------------+ 0x0000000000000000
--//
--//   6                 4 4  4 4
--//   3                 7 6  3 2                                              0
--//  +------------------+-----+------------------------------------------------+
--//  |00000000 00000000 0|1111|111 11111111 11111111 11111111 11111111 11111111|
--//  +-------------------+----+------------------------------------------------+
--//  |                   |    |
--//  |                   |    * 42-0 Object Offset (43-bits, 8TB address space)
--//  |                   |
--//  |                   * 46-43 Metadata Bits (4-bits)  0001 = Marked0      (Address view 8-16TB)
--//  |                                                   0010 = Marked1      (Address view 16-24TB)
--//  |                                                   0100 = Remapped     (Address view 32-40TB)
--//  |                                                   1000 = Finalizable  (Address view N/A)
--//  |
--//  * 63-47 Fixed (17-bits, always zero)
--//
--//
--// Address Space & Pointer Layout 3
--// --------------------------------
--//
--//  +--------------------------------+ 0x00007FFFFFFFFFFF (127TB)
--//  .                                .
--//  .                                .
--//  .                                .
--//  +--------------------------------+ 0x0000500000000000 (80TB)
--//  |         Remapped View          |
--//  +--------------------------------+ 0x0000400000000000 (64TB)
--//  .                                .
--//  +--------------------------------+ 0x0000300000000000 (48TB)
--//  |         Marked1 View           |
--//  +--------------------------------+ 0x0000200000000000 (32TB)
--//  |         Marked0 View           |
--//  +--------------------------------+ 0x0000100000000000 (16TB)
--//  .                                .
--//  +--------------------------------+ 0x0000000000000000
--//
--//   6               4  4  4 4
--//   3               8  7  4 3                                               0
--//  +------------------+----+-------------------------------------------------+
--//  |00000000 00000000 |1111|1111 11111111 11111111 11111111 11111111 11111111|
--//  +------------------+----+-------------------------------------------------+
--//  |                  |    |
--//  |                  |    * 43-0 Object Offset (44-bits, 16TB address space)
--//  |                  |
--//  |                  * 47-44 Metadata Bits (4-bits)  0001 = Marked0      (Address view 16-32TB)
--//  |                                                  0010 = Marked1      (Address view 32-48TB)
--//  |                                                  0100 = Remapped     (Address view 64-80TB)
--//  |                                                  1000 = Finalizable  (Address view N/A)
--//  |
--//  * 63-48 Fixed (16-bits, always zero)
--//
--
--// Default value if probing is not implemented for a certain platform: 128TB
--static const size_t DEFAULT_MAX_ADDRESS_BIT = 47;
--// Minimum value returned, if probing fails: 64GB
--static const size_t MINIMUM_MAX_ADDRESS_BIT = 36;
--
--static size_t probe_valid_max_address_bit() {
--#ifdef LINUX
--  size_t max_address_bit = 0;
--  const size_t page_size = os::vm_page_size();
--  for (size_t i = DEFAULT_MAX_ADDRESS_BIT; i > MINIMUM_MAX_ADDRESS_BIT; --i) {
--    const uintptr_t base_addr = ((uintptr_t) 1U) << i;
--    if (msync((void*)base_addr, page_size, MS_ASYNC) == 0) {
--      // msync suceeded, the address is valid, and maybe even already mapped.
--      max_address_bit = i;
--      break;
--    }
--    if (errno != ENOMEM) {
--      // Some error occured. This should never happen, but msync
--      // has some undefined behavior, hence ignore this bit.
--#ifdef ASSERT
--      fatal("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno));
--#else // ASSERT
--      log_warning_p(gc)("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno));
--#endif // ASSERT
--      continue;
--    }
--    // Since msync failed with ENOMEM, the page might not be mapped.
--    // Try to map it, to see if the address is valid.
--    void* const result_addr = mmap((void*) base_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0);
--    if (result_addr != MAP_FAILED) {
--      munmap(result_addr, page_size);
--    }
--    if ((uintptr_t) result_addr == base_addr) {
--      // address is valid
--      max_address_bit = i;
--      break;
--    }
--  }
--  if (max_address_bit == 0) {
--    // probing failed, allocate a very high page and take that bit as the maximum
--    const uintptr_t high_addr = ((uintptr_t) 1U) << DEFAULT_MAX_ADDRESS_BIT;
--    void* const result_addr = mmap((void*) high_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0);
--    if (result_addr != MAP_FAILED) {
--      max_address_bit = BitsPerSize_t - count_leading_zeros((size_t) result_addr) - 1;
--      munmap(result_addr, page_size);
--    }
--  }
--  log_info_p(gc, init)("Probing address space for the highest valid bit: " SIZE_FORMAT, max_address_bit);
--  return MAX2(max_address_bit, MINIMUM_MAX_ADDRESS_BIT);
--#else // LINUX
--  return DEFAULT_MAX_ADDRESS_BIT;
--#endif // LINUX
--}
--
--size_t ZPlatformAddressOffsetBits() {
--  const static size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1;
--  const size_t max_address_offset_bits = valid_max_address_offset_bits - 3;
--  const size_t min_address_offset_bits = max_address_offset_bits - 2;
--  const size_t address_offset = round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio);
--  const size_t address_offset_bits = log2i_exact(address_offset);
--  return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits);
--}
--
--size_t ZPlatformAddressMetadataShift() {
--  return ZPlatformAddressOffsetBits();
--}
-diff --git a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp
-deleted file mode 100644
-index f20ecd9b073..00000000000
---- a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp
-+++ /dev/null
-@@ -1,36 +0,0 @@
--/*
-- * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#ifndef CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP
--#define CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP
--
--const size_t ZPlatformGranuleSizeShift = 21; // 2MB
--const size_t ZPlatformHeapViews        = 3;
--const size_t ZPlatformCacheLineSize    = 64;
--
--size_t ZPlatformAddressOffsetBits();
--size_t ZPlatformAddressMetadataShift();
--
--#endif // CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad b/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad
-deleted file mode 100644
-index 6b6f87814a5..00000000000
---- a/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad
-+++ /dev/null
-@@ -1,233 +0,0 @@
--//
--// Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
--// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
--// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
--//
--// This code is free software; you can redistribute it and/or modify it
--// under the terms of the GNU General Public License version 2 only, as
--// published by the Free Software Foundation.
--//
--// This code is distributed in the hope that it will be useful, but WITHOUT
--// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
--// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
--// version 2 for more details (a copy is included in the LICENSE file that
--// accompanied this code).
--//
--// You should have received a copy of the GNU General Public License version
--// 2 along with this work; if not, write to the Free Software Foundation,
--// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
--//
--// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
--// or visit www.oracle.com if you need additional information or have any
--// questions.
--//
--
--source_hpp %{
--
--#include "gc/shared/gc_globals.hpp"
--#include "gc/z/c2/zBarrierSetC2.hpp"
--#include "gc/z/zThreadLocalData.hpp"
--
--%}
--
--source %{
--
--static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, int barrier_data) {
--  if (barrier_data == ZLoadBarrierElided) {
--    return;
--  }
--  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, barrier_data);
--  __ ld(tmp, Address(xthread, ZThreadLocalData::address_bad_mask_offset()));
--  __ andr(tmp, tmp, ref);
--  __ bnez(tmp, *stub->entry(), true /* far */);
--  __ bind(*stub->continuation());
--}
--
--static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
--  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, ZLoadBarrierStrong);
--  __ j(*stub->entry());
--  __ bind(*stub->continuation());
--}
--
--%}
--
--// Load Pointer
--instruct zLoadP(iRegPNoSp dst, memory mem)
--%{
--  match(Set dst (LoadP mem));
--  predicate(UseZGC && (n->as_Load()->barrier_data() != 0));
--  effect(TEMP dst);
--
--  ins_cost(4 * DEFAULT_COST);
--
--  format %{ "ld  $dst, $mem, #@zLoadP" %}
--
--  ins_encode %{
--    const Address ref_addr (as_Register($mem$$base), $mem$$disp);
--    __ ld($dst$$Register, ref_addr);
--    z_load_barrier(_masm, this, ref_addr, $dst$$Register, t0 /* tmp */, barrier_data());
--  %}
--
--  ins_pipe(iload_reg_mem);
--%}
--
--instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
--  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
--  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
--  predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
--  effect(KILL cr, TEMP_DEF res);
--
--  ins_cost(2 * VOLATILE_REF_COST);
--
--  format %{ "cmpxchg $mem, $oldval, $newval, #@zCompareAndSwapP\n\t"
--            "mv $res, $res == $oldval" %}
--
--  ins_encode %{
--    Label failed;
--    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
--    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
--               Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
--               true /* result_as_bool */);
--    __ beqz($res$$Register, failed);
--    __ mv(t0, $oldval$$Register);
--    __ bind(failed);
--    if (barrier_data() != ZLoadBarrierElided) {
--      Label good;
--      __ ld(t1, Address(xthread, ZThreadLocalData::address_bad_mask_offset()), t1 /* tmp */);
--      __ andr(t1, t1, t0);
--      __ beqz(t1, good);
--      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), t0 /* ref */, t1 /* tmp */);
--      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
--                 Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
--                 true /* result_as_bool */);
--      __ bind(good);
--    }
--  %}
--
--  ins_pipe(pipe_slow);
--%}
--
--instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
--  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
--  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
--  predicate(UseZGC && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong));
--  effect(KILL cr, TEMP_DEF res);
--
--  ins_cost(2 * VOLATILE_REF_COST);
--
--  format %{ "cmpxchg $mem, $oldval, $newval, #@zCompareAndSwapPAcq\n\t"
--            "mv $res, $res == $oldval" %}
--
--  ins_encode %{
--    Label failed;
--    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
--    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
--               Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
--               true /* result_as_bool */);
--    __ beqz($res$$Register, failed);
--    __ mv(t0, $oldval$$Register);
--    __ bind(failed);
--    if (barrier_data() != ZLoadBarrierElided) {
--      Label good;
--      __ ld(t1, Address(xthread, ZThreadLocalData::address_bad_mask_offset()), t1 /* tmp */);
--      __ andr(t1, t1, t0);
--      __ beqz(t1, good);
--      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), t0 /* ref */, t1 /* tmp */);
--      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
--                 Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
--                 true /* result_as_bool */);
--      __ bind(good);
--    }
--  %}
--
--  ins_pipe(pipe_slow);
--%}
--
--instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{
--  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
--  predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
--  effect(TEMP_DEF res);
--
--  ins_cost(2 * VOLATILE_REF_COST);
--
--  format %{ "cmpxchg $res = $mem, $oldval, $newval, #@zCompareAndExchangeP" %}
--
--  ins_encode %{
--    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
--    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
--               Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register);
--    if (barrier_data() != ZLoadBarrierElided) {
--      Label good;
--      __ ld(t0, Address(xthread, ZThreadLocalData::address_bad_mask_offset()));
--      __ andr(t0, t0, $res$$Register);
--      __ beqz(t0, good);
--      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, t0 /* tmp */);
--      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
--                 Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register);
--      __ bind(good);
--    }
--  %}
--
--  ins_pipe(pipe_slow);
--%}
--
--instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{
--  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
--  predicate(UseZGC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
--  effect(TEMP_DEF res);
--
--  ins_cost(2 * VOLATILE_REF_COST);
--
--  format %{ "cmpxchg $res = $mem, $oldval, $newval, #@zCompareAndExchangePAcq" %}
--
--  ins_encode %{
--    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
--    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
--               Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register);
--    if (barrier_data() != ZLoadBarrierElided) {
--      Label good;
--      __ ld(t0, Address(xthread, ZThreadLocalData::address_bad_mask_offset()));
--      __ andr(t0, t0, $res$$Register);
--      __ beqz(t0, good);
--      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, t0 /* tmp */);
--      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64,
--                 Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register);
--      __ bind(good);
--    }
--  %}
--
--  ins_pipe(pipe_slow);
--%}
--
--instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{
--  match(Set prev (GetAndSetP mem newv));
--  predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0);
--  effect(TEMP_DEF prev, KILL cr);
--
--  ins_cost(2 * VOLATILE_REF_COST);
--
--  format %{ "atomic_xchg  $prev, $newv, [$mem], #@zGetAndSetP" %}
--
--  ins_encode %{
--    __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
--    z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, t0 /* tmp */, barrier_data());
--  %}
--
--  ins_pipe(pipe_serial);
--%}
--
--instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{
--  match(Set prev (GetAndSetP mem newv));
--  predicate(UseZGC && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() != 0));
--  effect(TEMP_DEF prev, KILL cr);
--
--  ins_cost(VOLATILE_REF_COST);
--
--  format %{ "atomic_xchg_acq  $prev, $newv, [$mem], #@zGetAndSetPAcq" %}
--
--  ins_encode %{
--    __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
--    z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, t0 /* tmp */, barrier_data());
--  %}
--  ins_pipe(pipe_serial);
--%}
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index 86710295444..9d2cc4cf89f 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -1046,52 +1046,6 @@ int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
-   return count;
- }
- 
--#ifdef COMPILER2
--int MacroAssembler::push_vp(unsigned int bitset, Register stack) {
--  CompressibleRegion cr(this);
--  int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
--
--  // Scan bitset to accumulate register pairs
--  unsigned char regs[32];
--  int count = 0;
--  for (int reg = 31; reg >= 0; reg--) {
--    if ((1U << 31) & bitset) {
--      regs[count++] = reg;
--    }
--    bitset <<= 1;
--  }
--
--  for (int i = 0; i < count; i++) {
--    sub(stack, stack, vector_size_in_bytes);
--    vs1r_v(as_VectorRegister(regs[i]), stack);
--  }
--
--  return count * vector_size_in_bytes / wordSize;
--}
--
--int MacroAssembler::pop_vp(unsigned int bitset, Register stack) {
--  CompressibleRegion cr(this);
--  int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
--
--  // Scan bitset to accumulate register pairs
--  unsigned char regs[32];
--  int count = 0;
--  for (int reg = 31; reg >= 0; reg--) {
--    if ((1U << 31) & bitset) {
--      regs[count++] = reg;
--    }
--    bitset <<= 1;
--  }
--
--  for (int i = count - 1; i >= 0; i--) {
--    vl1r_v(as_VectorRegister(regs[i]), stack);
--    add(stack, stack, vector_size_in_bytes);
--  }
--
--  return count * vector_size_in_bytes / wordSize;
--}
--#endif // COMPILER2
--
- void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) {
-   CompressibleRegion cr(this);
-   // Push integer registers x7, x10-x17, x28-x31.
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index 23e09475be1..b2f0455a1f1 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -484,12 +484,6 @@ class MacroAssembler: public Assembler {
-   void pop_reg(Register Rd);
-   int  push_reg(unsigned int bitset, Register stack);
-   int  pop_reg(unsigned int bitset, Register stack);
--  void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); }
--  void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); }
--#ifdef COMPILER2
--  void push_vp(VectorRegSet regs, Register stack) { if (regs.bits()) push_vp(regs.bits(), stack); }
--  void pop_vp(VectorRegSet regs, Register stack) { if (regs.bits()) pop_vp(regs.bits(), stack); }
--#endif // COMPILER2
- 
-   // Push and pop everything that might be clobbered by a native
-   // runtime call except t0 and t1. (They are always
-@@ -783,9 +777,6 @@ class MacroAssembler: public Assembler {
-   int push_fp(unsigned int bitset, Register stack);
-   int pop_fp(unsigned int bitset, Register stack);
- 
--  int push_vp(unsigned int bitset, Register stack);
--  int pop_vp(unsigned int bitset, Register stack);
--
-   // vext
-   void vmnot_m(VectorRegister vd, VectorRegister vs);
-   void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked);
-diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-index b3fdd04db1b..b05edf7172c 100644
---- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-@@ -546,16 +546,6 @@ class StubGenerator: public StubCodeGenerator {
-     // make sure object is 'reasonable'
-     __ beqz(x10, exit); // if obj is NULL it is OK
- 
--#if INCLUDE_ZGC
--    if (UseZGC) {
--      // Check if mask is good.
--      // verifies that ZAddressBadMask & x10 == 0
--      __ ld(c_rarg3, Address(xthread, ZThreadLocalData::address_bad_mask_offset()));
--      __ andr(c_rarg2, x10, c_rarg3);
--      __ bnez(c_rarg2, error);
--    }
--#endif
--
-     // Check if the oop is in the right area of memory
-     __ mv(c_rarg3, (intptr_t) Universe::verify_oop_mask());
-     __ andr(c_rarg2, x10, c_rarg3);
-
-From 7772140df96747b42b13007d0827fc21d2a8b926 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Mon, 27 Mar 2023 15:43:39 +0800
-Subject: [PATCH 003/140] Drop the C2 Vector part
-
----
- make/hotspot/gensrc/GensrcAdlc.gmk            |    1 -
- .../cpu/riscv/c2_MacroAssembler_riscv.cpp     |  325 ---
- .../cpu/riscv/c2_MacroAssembler_riscv.hpp     |   52 -
- src/hotspot/cpu/riscv/globals_riscv.hpp       |    8 +-
- .../cpu/riscv/macroAssembler_riscv.cpp        |   22 +-
- .../cpu/riscv/macroAssembler_riscv.hpp        |    4 +-
- src/hotspot/cpu/riscv/matcher_riscv.hpp       |   44 +-
- src/hotspot/cpu/riscv/register_riscv.cpp      |    5 -
- src/hotspot/cpu/riscv/register_riscv.hpp      |    4 +-
- src/hotspot/cpu/riscv/riscv.ad                |  476 +---
- src/hotspot/cpu/riscv/riscv_v.ad              | 2065 -----------------
- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp |   61 +-
- src/hotspot/cpu/riscv/stubGenerator_riscv.cpp |  110 -
- src/hotspot/cpu/riscv/vm_version_riscv.cpp    |    4 -
- src/hotspot/cpu/riscv/vmreg_riscv.cpp         |   10 +-
- src/hotspot/cpu/riscv/vmreg_riscv.hpp         |   17 +-
- 16 files changed, 41 insertions(+), 3167 deletions(-)
- delete mode 100644 src/hotspot/cpu/riscv/riscv_v.ad
-
-diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk
-index 67f4c6f0574..51137b99db2 100644
---- a/make/hotspot/gensrc/GensrcAdlc.gmk
-+++ b/make/hotspot/gensrc/GensrcAdlc.gmk
-@@ -152,7 +152,6 @@ ifeq ($(call check-jvm-feature, compiler2), true)
- 
-   ifeq ($(HOTSPOT_TARGET_CPU_ARCH), riscv)
-     AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
--        $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_v.ad \
-         $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_b.ad \
-     )))
-   endif
-diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
-index 27770dc17aa..73f84a724ca 100644
---- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
-@@ -1319,328 +1319,3 @@ void C2_MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRe
- 
-   bind(Done);
- }
--
--void C2_MacroAssembler::element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2,
--                                        VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE) {
--  Label loop;
--  Assembler::SEW sew = islatin ? Assembler::e8 : Assembler::e16;
--
--  bind(loop);
--  vsetvli(tmp1, cnt, sew, Assembler::m2);
--  vlex_v(vr1, a1, sew);
--  vlex_v(vr2, a2, sew);
--  vmsne_vv(vrs, vr1, vr2);
--  vfirst_m(tmp2, vrs);
--  bgez(tmp2, DONE);
--  sub(cnt, cnt, tmp1);
--  if (!islatin) {
--    slli(tmp1, tmp1, 1); // get byte counts
--  }
--  add(a1, a1, tmp1);
--  add(a2, a2, tmp1);
--  bnez(cnt, loop);
--
--  mv(result, true);
--}
--
--void C2_MacroAssembler::string_equals_v(Register a1, Register a2, Register result, Register cnt, int elem_size) {
--  Label DONE;
--  Register tmp1 = t0;
--  Register tmp2 = t1;
--
--  BLOCK_COMMENT("string_equals_v {");
--
--  mv(result, false);
--
--  if (elem_size == 2) {
--    srli(cnt, cnt, 1);
--  }
--
--  element_compare(a1, a2, result, cnt, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE);
--
--  bind(DONE);
--  BLOCK_COMMENT("} string_equals_v");
--}
--
--// used by C2 ClearArray patterns.
--// base: Address of a buffer to be zeroed
--// cnt: Count in HeapWords
--//
--// base, cnt, v0, v1 and t0 are clobbered.
--void C2_MacroAssembler::clear_array_v(Register base, Register cnt) {
--  Label loop;
--
--  // making zero words
--  vsetvli(t0, cnt, Assembler::e64, Assembler::m4);
--  vxor_vv(v0, v0, v0);
--
--  bind(loop);
--  vsetvli(t0, cnt, Assembler::e64, Assembler::m4);
--  vse64_v(v0, base);
--  sub(cnt, cnt, t0);
--  shadd(base, t0, base, t0, 3);
--  bnez(cnt, loop);
--}
--
--void C2_MacroAssembler::arrays_equals_v(Register a1, Register a2, Register result,
--                                        Register cnt1, int elem_size) {
--  Label DONE;
--  Register tmp1 = t0;
--  Register tmp2 = t1;
--  Register cnt2 = tmp2;
--  int length_offset = arrayOopDesc::length_offset_in_bytes();
--  int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
--
--  BLOCK_COMMENT("arrays_equals_v {");
--
--  // if (a1 == a2), return true
--  mv(result, true);
--  beq(a1, a2, DONE);
--
--  mv(result, false);
--  // if a1 == null or a2 == null, return false
--  beqz(a1, DONE);
--  beqz(a2, DONE);
--  // if (a1.length != a2.length), return false
--  lwu(cnt1, Address(a1, length_offset));
--  lwu(cnt2, Address(a2, length_offset));
--  bne(cnt1, cnt2, DONE);
--
--  la(a1, Address(a1, base_offset));
--  la(a2, Address(a2, base_offset));
--
--  element_compare(a1, a2, result, cnt1, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE);
--
--  bind(DONE);
--
--  BLOCK_COMMENT("} arrays_equals_v");
--}
--
--void C2_MacroAssembler::string_compare_v(Register str1, Register str2, Register cnt1, Register cnt2,
--                                         Register result, Register tmp1, Register tmp2, int encForm) {
--  Label DIFFERENCE, DONE, L, loop;
--  bool encLL = encForm == StrIntrinsicNode::LL;
--  bool encLU = encForm == StrIntrinsicNode::LU;
--  bool encUL = encForm == StrIntrinsicNode::UL;
--
--  bool str1_isL = encLL || encLU;
--  bool str2_isL = encLL || encUL;
--
--  int minCharsInWord = encLL ? wordSize : wordSize / 2;
--
--  BLOCK_COMMENT("string_compare {");
--
--  // for Lating strings, 1 byte for 1 character
--  // for UTF16 strings, 2 bytes for 1 character
--  if (!str1_isL)
--    sraiw(cnt1, cnt1, 1);
--  if (!str2_isL)
--    sraiw(cnt2, cnt2, 1);
--
--  // if str1 == str2, return the difference
--  // save the minimum of the string lengths in cnt2.
--  sub(result, cnt1, cnt2);
--  bgt(cnt1, cnt2, L);
--  mv(cnt2, cnt1);
--  bind(L);
--
--  if (str1_isL == str2_isL) { // LL or UU
--    element_compare(str1, str2, zr, cnt2, tmp1, tmp2, v2, v4, v1, encLL, DIFFERENCE);
--    j(DONE);
--  } else { // LU or UL
--    Register strL = encLU ? str1 : str2;
--    Register strU = encLU ? str2 : str1;
--    VectorRegister vstr1 = encLU ? v4 : v0;
--    VectorRegister vstr2 = encLU ? v0 : v4;
--
--    bind(loop);
--    vsetvli(tmp1, cnt2, Assembler::e8, Assembler::m2);
--    vle8_v(vstr1, strL);
--    vsetvli(tmp1, cnt2, Assembler::e16, Assembler::m4);
--    vzext_vf2(vstr2, vstr1);
--    vle16_v(vstr1, strU);
--    vmsne_vv(v0, vstr2, vstr1);
--    vfirst_m(tmp2, v0);
--    bgez(tmp2, DIFFERENCE);
--    sub(cnt2, cnt2, tmp1);
--    add(strL, strL, tmp1);
--    shadd(strU, tmp1, strU, tmp1, 1);
--    bnez(cnt2, loop);
--    j(DONE);
--  }
--  bind(DIFFERENCE);
--  slli(tmp1, tmp2, 1);
--  add(str1, str1, str1_isL ? tmp2 : tmp1);
--  add(str2, str2, str2_isL ? tmp2 : tmp1);
--  str1_isL ? lbu(tmp1, Address(str1, 0)) : lhu(tmp1, Address(str1, 0));
--  str2_isL ? lbu(tmp2, Address(str2, 0)) : lhu(tmp2, Address(str2, 0));
--  sub(result, tmp1, tmp2);
--
--  bind(DONE);
--}
--
--void C2_MacroAssembler::byte_array_inflate_v(Register src, Register dst, Register len, Register tmp) {
--  Label loop;
--  assert_different_registers(src, dst, len, tmp, t0);
--
--  BLOCK_COMMENT("byte_array_inflate_v {");
--  bind(loop);
--  vsetvli(tmp, len, Assembler::e8, Assembler::m2);
--  vle8_v(v2, src);
--  vsetvli(t0, len, Assembler::e16, Assembler::m4);
--  vzext_vf2(v0, v2);
--  vse16_v(v0, dst);
--  sub(len, len, tmp);
--  add(src, src, tmp);
--  shadd(dst, tmp, dst, tmp, 1);
--  bnez(len, loop);
--  BLOCK_COMMENT("} byte_array_inflate_v");
--}
--
--// Compress char[] array to byte[].
--// result: the array length if every element in array can be encoded; 0, otherwise.
--void C2_MacroAssembler::char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp) {
--  Label done;
--  encode_iso_array_v(src, dst, len, result, tmp);
--  beqz(len, done);
--  mv(result, zr);
--  bind(done);
--}
--
--// result: the number of elements had been encoded.
--void C2_MacroAssembler::encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp) {
--  Label loop, DIFFERENCE, DONE;
--
--  BLOCK_COMMENT("encode_iso_array_v {");
--  mv(result, 0);
--
--  bind(loop);
--  mv(tmp, 0xff);
--  vsetvli(t0, len, Assembler::e16, Assembler::m2);
--  vle16_v(v2, src);
--  // if element > 0xff, stop
--  vmsgtu_vx(v1, v2, tmp);
--  vfirst_m(tmp, v1);
--  vmsbf_m(v0, v1);
--  // compress char to byte
--  vsetvli(t0, len, Assembler::e8);
--  vncvt_x_x_w(v1, v2, Assembler::v0_t);
--  vse8_v(v1, dst, Assembler::v0_t);
--
--  bgez(tmp, DIFFERENCE);
--  add(result, result, t0);
--  add(dst, dst, t0);
--  sub(len, len, t0);
--  shadd(src, t0, src, t0, 1);
--  bnez(len, loop);
--  j(DONE);
--
--  bind(DIFFERENCE);
--  add(result, result, tmp);
--
--  bind(DONE);
--  BLOCK_COMMENT("} encode_iso_array_v");
--}
--
--void C2_MacroAssembler::count_positives_v(Register ary, Register len, Register result, Register tmp) {
--  Label LOOP, SET_RESULT, DONE;
--
--  BLOCK_COMMENT("count_positives_v {");
--  mv(result, zr);
--
--  bind(LOOP);
--  vsetvli(t0, len, Assembler::e8, Assembler::m4);
--  vle8_v(v0, ary);
--  vmslt_vx(v0, v0, zr);
--  vfirst_m(tmp, v0);
--  bgez(tmp, SET_RESULT);
--  // if tmp == -1, all bytes are positive
--  add(result, result, t0);
--
--  sub(len, len, t0);
--  add(ary, ary, t0);
--  bnez(len, LOOP);
--  j(DONE);
--
--  // add remaining positive bytes count
--  bind(SET_RESULT);
--  add(result, result, tmp);
--
--  bind(DONE);
--  BLOCK_COMMENT("} count_positives_v");
--}
--
--void C2_MacroAssembler::string_indexof_char_v(Register str1, Register cnt1,
--                                              Register ch, Register result,
--                                              Register tmp1, Register tmp2,
--                                              bool isL) {
--  mv(result, zr);
--
--  Label loop, MATCH, DONE;
--  Assembler::SEW sew = isL ? Assembler::e8 : Assembler::e16;
--  bind(loop);
--  vsetvli(tmp1, cnt1, sew, Assembler::m4);
--  vlex_v(v0, str1, sew);
--  vmseq_vx(v0, v0, ch);
--  vfirst_m(tmp2, v0);
--  bgez(tmp2, MATCH); // if equal, return index
--
--  add(result, result, tmp1);
--  sub(cnt1, cnt1, tmp1);
--  if (!isL) slli(tmp1, tmp1, 1);
--  add(str1, str1, tmp1);
--  bnez(cnt1, loop);
--
--  mv(result, -1);
--  j(DONE);
--
--  bind(MATCH);
--  add(result, result, tmp2);
--
--  bind(DONE);
--}
--
--// Set dst to NaN if any NaN input.
--void C2_MacroAssembler::minmax_FD_v(VectorRegister dst, VectorRegister src1, VectorRegister src2,
--                                    bool is_double, bool is_min) {
--  assert_different_registers(dst, src1, src2);
--
--  vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32);
--
--  is_min ? vfmin_vv(dst, src1, src2)
--         : vfmax_vv(dst, src1, src2);
--
--  vmfne_vv(v0,  src1, src1);
--  vfadd_vv(dst, src1, src1, Assembler::v0_t);
--  vmfne_vv(v0,  src2, src2);
--  vfadd_vv(dst, src2, src2, Assembler::v0_t);
--}
--
--// Set dst to NaN if any NaN input.
--void C2_MacroAssembler::reduce_minmax_FD_v(FloatRegister dst,
--                                           FloatRegister src1, VectorRegister src2,
--                                           VectorRegister tmp1, VectorRegister tmp2,
--                                           bool is_double, bool is_min) {
--  assert_different_registers(src2, tmp1, tmp2);
--
--  Label L_done, L_NaN;
--  vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32);
--  vfmv_s_f(tmp2, src1);
--
--  is_min ? vfredmin_vs(tmp1, src2, tmp2)
--         : vfredmax_vs(tmp1, src2, tmp2);
--
--  fsflags(zr);
--  // Checking NaNs
--  vmflt_vf(tmp2, src2, src1);
--  frflags(t0);
--  bnez(t0, L_NaN);
--  j(L_done);
--
--  bind(L_NaN);
--  vfmv_s_f(tmp2, src1);
--  vfredsum_vs(tmp1, src2, tmp2);
--
--  bind(L_done);
--  vfmv_f_s(dst, tmp1);
--}
-diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
-index c71df4c101b..90b6554af02 100644
---- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
-@@ -28,13 +28,6 @@
- 
- // C2_MacroAssembler contains high-level macros for C2
- 
-- private:
--  void element_compare(Register r1, Register r2,
--                       Register result, Register cnt,
--                       Register tmp1, Register tmp2,
--                       VectorRegister vr1, VectorRegister vr2,
--                       VectorRegister vrs,
--                       bool is_latin, Label& DONE);
-  public:
- 
-   void string_compare(Register str1, Register str2,
-@@ -145,49 +138,4 @@
-                  FloatRegister src1, FloatRegister src2,
-                  bool is_double, bool is_min);
- 
--  // intrinsic methods implemented by rvv instructions
--  void string_equals_v(Register r1, Register r2,
--                       Register result, Register cnt1,
--                       int elem_size);
--
--  void arrays_equals_v(Register r1, Register r2,
--                       Register result, Register cnt1,
--                       int elem_size);
--
--  void string_compare_v(Register str1, Register str2,
--                        Register cnt1, Register cnt2,
--                        Register result,
--                        Register tmp1, Register tmp2,
--                        int encForm);
--
-- void clear_array_v(Register base, Register cnt);
--
-- void byte_array_inflate_v(Register src, Register dst,
--                           Register len, Register tmp);
--
-- void char_array_compress_v(Register src, Register dst,
--                            Register len, Register result,
--                            Register tmp);
--
-- void encode_iso_array_v(Register src, Register dst,
--                         Register len, Register result,
--                         Register tmp);
--
-- void count_positives_v(Register ary, Register len,
--                        Register result, Register tmp);
--
-- void string_indexof_char_v(Register str1, Register cnt1,
--                            Register ch, Register result,
--                            Register tmp1, Register tmp2,
--                            bool isL);
--
-- void minmax_FD_v(VectorRegister dst,
--                  VectorRegister src1, VectorRegister src2,
--                  bool is_double, bool is_min);
--
-- void reduce_minmax_FD_v(FloatRegister dst,
--                         FloatRegister src1, VectorRegister src2,
--                         VectorRegister tmp1, VectorRegister tmp2,
--                         bool is_double, bool is_min);
--
- #endif // CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
-index cbfc0583883..845064d6cbc 100644
---- a/src/hotspot/cpu/riscv/globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
-@@ -90,10 +90,8 @@ define_pd_global(intx, InlineSmallCode,          1000);
-           "Extend fence.i to fence.i + fence.")                                  \
-   product(bool, AvoidUnalignedAccesses, true,                                    \
-           "Avoid generating unaligned memory accesses")                          \
--  product(bool, UseRVV, false, EXPERIMENTAL, "Use RVV instructions")             \
--  product(bool, UseRVB, false, EXPERIMENTAL, "Use RVB instructions")             \
--  product(bool, UseRVC, false, EXPERIMENTAL, "Use RVC instructions")             \
--  product(bool, UseRVVForBigIntegerShiftIntrinsics, true,                        \
--          "Use RVV instructions for left/right shift of BigInteger")
-+  experimental(bool, UseRVV, false, "Use RVV instructions")                      \
-+  experimental(bool, UseRVB, false, "Use RVB instructions")                      \
-+  experimental(bool, UseRVC, false, "Use RVC instructions")
- 
- #endif // CPU_RISCV_GLOBALS_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index 9d2cc4cf89f..8b8d126f6c9 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -1086,7 +1086,7 @@ void MacroAssembler::popa() {
-   pop_reg(0xffffffe2, sp);
- }
- 
--void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) {
-+void MacroAssembler::push_CPU_state() {
-   CompressibleRegion cr(this);
-   // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
-   push_reg(0xffffffe0, sp);
-@@ -1096,28 +1096,10 @@ void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes)
-   for (int i = 0; i < 32; i++) {
-     fsd(as_FloatRegister(i), Address(sp, i * wordSize));
-   }
--
--  // vector registers
--  if (save_vectors) {
--    sub(sp, sp, vector_size_in_bytes * VectorRegisterImpl::number_of_registers);
--    vsetvli(t0, x0, Assembler::e64, Assembler::m8);
--    for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) {
--      add(t0, sp, vector_size_in_bytes * i);
--      vse64_v(as_VectorRegister(i), t0);
--    }
--  }
- }
- 
--void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) {
-+void MacroAssembler::pop_CPU_state() {
-   CompressibleRegion cr(this);
--  // vector registers
--  if (restore_vectors) {
--    vsetvli(t0, x0, Assembler::e64, Assembler::m8);
--    for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) {
--      vle64_v(as_VectorRegister(i), sp);
--      add(sp, sp, vector_size_in_bytes * 8);
--    }
--  }
- 
-   // float registers
-   for (int i = 0; i < 32; i++) {
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index b2f0455a1f1..b43131514c1 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -501,8 +501,8 @@ class MacroAssembler: public Assembler {
- 
-   void pusha();
-   void popa();
--  void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0);
--  void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0);
-+  void push_CPU_state();
-+  void pop_CPU_state();
- 
-   // if heap base register is used - reinit it with the correct value
-   void reinit_heapbase();
-diff --git a/src/hotspot/cpu/riscv/matcher_riscv.hpp b/src/hotspot/cpu/riscv/matcher_riscv.hpp
-index 23a75d20502..4c7fabd7240 100644
---- a/src/hotspot/cpu/riscv/matcher_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/matcher_riscv.hpp
-@@ -31,16 +31,9 @@
-   // false => size gets scaled to BytesPerLong, ok.
-   static const bool init_array_count_is_in_bytes = false;
- 
--  // Whether this platform implements the scalable vector feature
--  static const bool implements_scalable_vector = true;
--
--  static const bool supports_scalable_vector() {
--    return UseRVV;
--  }
--
--  // riscv supports misaligned vectors store/load.
-+  // riscv doesn't support misaligned vectors store/load on JDK11.
-   static constexpr bool misaligned_vectors_ok() {
--    return true;
-+    return false;
-   }
- 
-   // Whether code generation need accurate ConvI2L types.
-@@ -53,9 +46,6 @@
-   // the cpu only look at the lower 5/6 bits anyway?
-   static const bool need_masked_shift_count = false;
- 
--  // No support for generic vector operands.
--  static const bool supports_generic_vector_operands = false;
--
-   static constexpr bool isSimpleConstant64(jlong value) {
-     // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
-     // Probably always true, even if a temp register is required.
-@@ -127,31 +117,6 @@
-   // the relevant 32 bits.
-   static const bool int_in_long = true;
- 
--  // Does the CPU supports vector variable shift instructions?
--  static constexpr bool supports_vector_variable_shifts(void) {
--    return false;
--  }
--
--  // Does the CPU supports vector variable rotate instructions?
--  static constexpr bool supports_vector_variable_rotates(void) {
--    return false;
--  }
--
--  // Does the CPU supports vector constant rotate instructions?
--  static constexpr bool supports_vector_constant_rotates(int shift) {
--    return false;
--  }
--
--  // Does the CPU supports vector unsigned comparison instructions?
--  static const bool supports_vector_comparison_unsigned(int vlen, BasicType bt) {
--    return false;
--  }
--
--  // Some microarchitectures have mask registers used on vectors
--  static const bool has_predicated_vectors(void) {
--    return false;
--  }
--
-   // true means we have fast l2f convers
-   // false means that conversion is done by runtime call
-   static constexpr bool convL2FSupported(void) {
-@@ -161,9 +126,4 @@
-   // Implements a variant of EncodeISOArrayNode that encode ASCII only
-   static const bool supports_encode_ascii_array = false;
- 
--  // Returns pre-selection estimated size of a vector operation.
--  static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) {
--    return 0;
--  }
--
- #endif // CPU_RISCV_MATCHER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp
-index f8116e9df8c..96cf1996a83 100644
---- a/src/hotspot/cpu/riscv/register_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/register_riscv.cpp
-@@ -37,11 +37,6 @@ const int ConcreteRegisterImpl::max_fpr =
-     ConcreteRegisterImpl::max_gpr +
-     FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register;
- 
--const int ConcreteRegisterImpl::max_vpr =
--    ConcreteRegisterImpl::max_fpr +
--    VectorRegisterImpl::number_of_registers * VectorRegisterImpl::max_slots_per_register;
--
--
- const char* RegisterImpl::name() const {
-   static const char *const names[number_of_registers] = {
-     "zr", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "fp", "x9",
-diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp
-index a9200cac647..d697751f55f 100644
---- a/src/hotspot/cpu/riscv/register_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/register_riscv.hpp
-@@ -307,14 +307,12 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl {
-   // it's optoregs.
- 
-     number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers +
--                           FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers +
--                           VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers)
-+                           FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers)
-   };
- 
-   // added to make it compile
-   static const int max_gpr;
-   static const int max_fpr;
--  static const int max_vpr;
- };
- 
- typedef AbstractRegSet<Register> RegSet;
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 588887e1d96..85593a942e9 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -226,177 +226,6 @@ reg_def F30_H ( SOC, SOC, Op_RegF,  30, f30->as_VMReg()->next() );
- reg_def F31   ( SOC, SOC, Op_RegF,  31, f31->as_VMReg()         );
- reg_def F31_H ( SOC, SOC, Op_RegF,  31, f31->as_VMReg()->next() );
- 
--// ----------------------------
--// Vector Registers
--// ----------------------------
--
--// For RVV vector registers, we simply extend vector register size to 4
--// 'logical' slots. This is nominally 128 bits but it actually covers
--// all possible 'physical' RVV vector register lengths from 128 ~ 1024
--// bits. The 'physical' RVV vector register length is detected during
--// startup, so the register allocator is able to identify the correct
--// number of bytes needed for an RVV spill/unspill.
--
--reg_def V0    ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()           );
--reg_def V0_H  ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()->next()   );
--reg_def V0_J  ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()->next(2)  );
--reg_def V0_K  ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()->next(3)  );
--
--reg_def V1    ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg() 	        );
--reg_def V1_H  ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg()->next()   );
--reg_def V1_J  ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg()->next(2)  );
--reg_def V1_K  ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg()->next(3)  );
--
--reg_def V2    ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()           );
--reg_def V2_H  ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()->next()   );
--reg_def V2_J  ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()->next(2)  );
--reg_def V2_K  ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()->next(3)  );
--
--reg_def V3    ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()           );
--reg_def V3_H  ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()->next()   );
--reg_def V3_J  ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()->next(2)  );
--reg_def V3_K  ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()->next(3)  );
--
--reg_def V4    ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()           );
--reg_def V4_H  ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()->next()   );
--reg_def V4_J  ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()->next(2)  );
--reg_def V4_K  ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()->next(3)  );
--
--reg_def V5    ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg() 	        );
--reg_def V5_H  ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg()->next()   );
--reg_def V5_J  ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg()->next(2)  );
--reg_def V5_K  ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg()->next(3)  );
--
--reg_def V6    ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()           );
--reg_def V6_H  ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()->next()   );
--reg_def V6_J  ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()->next(2)  );
--reg_def V6_K  ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()->next(3)  );
--
--reg_def V7    ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg() 	        );
--reg_def V7_H  ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg()->next()   );
--reg_def V7_J  ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg()->next(2)  );
--reg_def V7_K  ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg()->next(3)  );
--
--reg_def V8    ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()           );
--reg_def V8_H  ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()->next()   );
--reg_def V8_J  ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()->next(2)  );
--reg_def V8_K  ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()->next(3)  );
--
--reg_def V9    ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()           );
--reg_def V9_H  ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()->next()   );
--reg_def V9_J  ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()->next(2)  );
--reg_def V9_K  ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()->next(3)  );
--
--reg_def V10   ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()          );
--reg_def V10_H ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next()  );
--reg_def V10_J ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(2) );
--reg_def V10_K ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(3) );
--
--reg_def V11   ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()          );
--reg_def V11_H ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next()  );
--reg_def V11_J ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(2) );
--reg_def V11_K ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(3) );
--
--reg_def V12   ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()          );
--reg_def V12_H ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next()  );
--reg_def V12_J ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(2) );
--reg_def V12_K ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(3) );
--
--reg_def V13   ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()          );
--reg_def V13_H ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next()  );
--reg_def V13_J ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(2) );
--reg_def V13_K ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(3) );
--
--reg_def V14   ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()          );
--reg_def V14_H ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next()  );
--reg_def V14_J ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(2) );
--reg_def V14_K ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(3) );
--
--reg_def V15   ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()          );
--reg_def V15_H ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next()  );
--reg_def V15_J ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(2) );
--reg_def V15_K ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(3) );
--
--reg_def V16   ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()          );
--reg_def V16_H ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next()  );
--reg_def V16_J ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(2) );
--reg_def V16_K ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(3) );
--
--reg_def V17   ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()          );
--reg_def V17_H ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next()  );
--reg_def V17_J ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(2) );
--reg_def V17_K ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(3) );
--
--reg_def V18   ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()          );
--reg_def V18_H ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next()  );
--reg_def V18_J ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(2) );
--reg_def V18_K ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(3) );
--
--reg_def V19   ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()          );
--reg_def V19_H ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next()  );
--reg_def V19_J ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(2) );
--reg_def V19_K ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(3) );
--
--reg_def V20   ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()          );
--reg_def V20_H ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next()  );
--reg_def V20_J ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(2) );
--reg_def V20_K ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(3) );
--
--reg_def V21   ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()          );
--reg_def V21_H ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next()  );
--reg_def V21_J ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(2) );
--reg_def V21_K ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(3) );
--
--reg_def V22   ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()          );
--reg_def V22_H ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next()  );
--reg_def V22_J ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(2) );
--reg_def V22_K ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(3) );
--
--reg_def V23   ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()          );
--reg_def V23_H ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next()  );
--reg_def V23_J ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(2) );
--reg_def V23_K ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(3) );
--
--reg_def V24   ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()          );
--reg_def V24_H ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next()  );
--reg_def V24_J ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(2) );
--reg_def V24_K ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(3) );
--
--reg_def V25   ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()          );
--reg_def V25_H ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next()  );
--reg_def V25_J ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(2) );
--reg_def V25_K ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(3) );
--
--reg_def V26   ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()          );
--reg_def V26_H ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next()  );
--reg_def V26_J ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(2) );
--reg_def V26_K ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(3) );
--
--reg_def V27   ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()          );
--reg_def V27_H ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next()  );
--reg_def V27_J ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(2) );
--reg_def V27_K ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(3) );
--
--reg_def V28   ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()          );
--reg_def V28_H ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next()  );
--reg_def V28_J ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(2) );
--reg_def V28_K ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(3) );
--
--reg_def V29   ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()          );
--reg_def V29_H ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next()  );
--reg_def V29_J ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(2) );
--reg_def V29_K ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(3) );
--
--reg_def V30   ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()          );
--reg_def V30_H ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next()  );
--reg_def V30_J ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(2) );
--reg_def V30_K ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(3) );
--
--reg_def V31   ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()          );
--reg_def V31_H ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next()  );
--reg_def V31_J ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(2) );
--reg_def V31_K ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(3) );
--
- // ----------------------------
- // Special Registers
- // ----------------------------
-@@ -495,42 +324,7 @@ alloc_class chunk1(
-     F27, F27_H,
- );
- 
--alloc_class chunk2(
--    V0, V0_H, V0_J, V0_K,
--    V1, V1_H, V1_J, V1_K,
--    V2, V2_H, V2_J, V2_K,
--    V3, V3_H, V3_J, V3_K,
--    V4, V4_H, V4_J, V4_K,
--    V5, V5_H, V5_J, V5_K,
--    V6, V6_H, V6_J, V6_K,
--    V7, V7_H, V7_J, V7_K,
--    V8, V8_H, V8_J, V8_K,
--    V9, V9_H, V9_J, V9_K,
--    V10, V10_H, V10_J, V10_K,
--    V11, V11_H, V11_J, V11_K,
--    V12, V12_H, V12_J, V12_K,
--    V13, V13_H, V13_J, V13_K,
--    V14, V14_H, V14_J, V14_K,
--    V15, V15_H, V15_J, V15_K,
--    V16, V16_H, V16_J, V16_K,
--    V17, V17_H, V17_J, V17_K,
--    V18, V18_H, V18_J, V18_K,
--    V19, V19_H, V19_J, V19_K,
--    V20, V20_H, V20_J, V20_K,
--    V21, V21_H, V21_J, V21_K,
--    V22, V22_H, V22_J, V22_K,
--    V23, V23_H, V23_J, V23_K,
--    V24, V24_H, V24_J, V24_K,
--    V25, V25_H, V25_J, V25_K,
--    V26, V26_H, V26_J, V26_K,
--    V27, V27_H, V27_J, V27_K,
--    V28, V28_H, V28_J, V28_K,
--    V29, V29_H, V29_J, V29_K,
--    V30, V30_H, V30_J, V30_K,
--    V31, V31_H, V31_J, V31_K,
--);
--
--alloc_class chunk3(RFLAGS);
-+alloc_class chunk2(RFLAGS);
- 
- //----------Architecture Description Register Classes--------------------------
- // Several register classes are automatically defined based upon information in
-@@ -826,41 +620,6 @@ reg_class double_reg(
-     F31, F31_H
- );
- 
--// Class for all RVV vector registers
--reg_class vectora_reg(
--    V1, V1_H, V1_J, V1_K,
--    V2, V2_H, V2_J, V2_K,
--    V3, V3_H, V3_J, V3_K,
--    V4, V4_H, V4_J, V4_K,
--    V5, V5_H, V5_J, V5_K,
--    V6, V6_H, V6_J, V6_K,
--    V7, V7_H, V7_J, V7_K,
--    V8, V8_H, V8_J, V8_K,
--    V9, V9_H, V9_J, V9_K,
--    V10, V10_H, V10_J, V10_K,
--    V11, V11_H, V11_J, V11_K,
--    V12, V12_H, V12_J, V12_K,
--    V13, V13_H, V13_J, V13_K,
--    V14, V14_H, V14_J, V14_K,
--    V15, V15_H, V15_J, V15_K,
--    V16, V16_H, V16_J, V16_K,
--    V17, V17_H, V17_J, V17_K,
--    V18, V18_H, V18_J, V18_K,
--    V19, V19_H, V19_J, V19_K,
--    V20, V20_H, V20_J, V20_K,
--    V21, V21_H, V21_J, V21_K,
--    V22, V22_H, V22_J, V22_K,
--    V23, V23_H, V23_J, V23_K,
--    V24, V24_H, V24_J, V24_K,
--    V25, V25_H, V25_J, V25_K,
--    V26, V26_H, V26_J, V26_K,
--    V27, V27_H, V27_J, V27_K,
--    V28, V28_H, V28_J, V28_K,
--    V29, V29_H, V29_J, V29_K,
--    V30, V30_H, V30_J, V30_K,
--    V31, V31_H, V31_J, V31_K
--);
--
- // Class for 64 bit register f0
- reg_class f0_reg(
-     F0, F0_H
-@@ -881,31 +640,6 @@ reg_class f3_reg(
-     F3, F3_H
- );
- 
--// class for vector register v1
--reg_class v1_reg(
--    V1, V1_H, V1_J, V1_K
--);
--
--// class for vector register v2
--reg_class v2_reg(
--    V2, V2_H, V2_J, V2_K
--);
--
--// class for vector register v3
--reg_class v3_reg(
--    V3, V3_H, V3_J, V3_K
--);
--
--// class for vector register v4
--reg_class v4_reg(
--    V4, V4_H, V4_J, V4_K
--);
--
--// class for vector register v5
--reg_class v5_reg(
--    V5, V5_H, V5_J, V5_K
--);
--
- // class for condition codes
- reg_class reg_flags(RFLAGS);
- %}
-@@ -1447,7 +1181,7 @@ const Pipeline * MachEpilogNode::pipeline() const {
- 
- // Figure out which register class each belongs in: rc_int, rc_float or
- // rc_stack.
--enum RC { rc_bad, rc_int, rc_float, rc_vector, rc_stack };
-+enum RC { rc_bad, rc_int, rc_float, rc_stack };
- 
- static enum RC rc_class(OptoReg::Name reg) {
- 
-@@ -1468,13 +1202,7 @@ static enum RC rc_class(OptoReg::Name reg) {
-     return rc_float;
-   }
- 
--  // we have 32 vector register * 4 halves
--  int slots_of_vector_registers = VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers;
--  if (reg < slots_of_int_registers + slots_of_float_registers + slots_of_vector_registers) {
--    return rc_vector;
--  }
--
--  // Between vector regs & stack is the flags regs.
-+  // Between float regs & stack is the flags regs.
-   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
- 
-   return rc_stack;
-@@ -1512,30 +1240,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
-   int src_offset = ra_->reg2offset(src_lo);
-   int dst_offset = ra_->reg2offset(dst_lo);
- 
--  if (bottom_type()->isa_vect() != NULL) {
--    uint ireg = ideal_reg();
--    if (ireg == Op_VecA && cbuf) {
--      C2_MacroAssembler _masm(cbuf);
--      Assembler::CompressibleRegion cr(&_masm);
--      int vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
--      if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
--        // stack to stack
--        __ spill_copy_vector_stack_to_stack(src_offset, dst_offset,
--                                            vector_reg_size_in_bytes);
--      } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_stack) {
--        // vpr to stack
--        __ spill(as_VectorRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo));
--      } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vector) {
--        // stack to vpr
--        __ unspill(as_VectorRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo));
--      } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_vector) {
--        // vpr to vpr
--        __ vmv1r_v(as_VectorRegister(Matcher::_regEncode[dst_lo]), as_VectorRegister(Matcher::_regEncode[src_lo]));
--      } else {
--        ShouldNotReachHere();
--      }
--    }
--  } else if (cbuf != NULL) {
-+  if (cbuf != NULL) {
-     C2_MacroAssembler _masm(cbuf);
-     Assembler::CompressibleRegion cr(&_masm);
-     switch (src_lo_rc) {
-@@ -1619,17 +1324,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
-     } else {
-       st->print("%s", Matcher::regName[dst_lo]);
-     }
--    if (bottom_type()->isa_vect() != NULL) {
--      int vsize = 0;
--      if (ideal_reg() == Op_VecA) {
--        vsize = Matcher::scalable_vector_reg_size(T_BYTE) * 8;
--      } else {
--        ShouldNotReachHere();
--      }
--      st->print("\t# vector spill size = %d", vsize);
--    } else {
--      st->print("\t# spill size = %d", is64 ? 64 : 32);
--    }
-+    st->print("\t# spill size = %d", is64 ? 64 : 32);
-   }
- 
-   return 0;
-@@ -1796,14 +1491,6 @@ const bool Matcher::match_rule_supported(int opcode) {
-       }
-       break;
- 
--    case Op_StrCompressedCopy: // fall through
--    case Op_StrInflatedCopy:   // fall through
--    case Op_CountPositives:
--      return UseRVV;
--
--    case Op_EncodeISOArray:
--      return UseRVV && SpecialEncodeISOArray;
--
-     case Op_PopCountI:
-     case Op_PopCountL:
-       return UsePopCountInstruction;
-@@ -1821,37 +1508,15 @@ const bool Matcher::match_rule_supported(int opcode) {
- }
- 
- // Identify extra cases that we might want to provide match rules for vector nodes and
--// other intrinsics guarded with vector length (vlen) and element type (bt).
--const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
--  if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
--    return false;
--  }
--
--  return op_vec_supported(opcode);
--}
--
--const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
-+// other intrinsics guarded with vector length (vlen).
-+const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
-   return false;
- }
- 
--const RegMask* Matcher::predicate_reg_mask(void) {
--  return NULL;
--}
--
--const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) {
--  return NULL;
--}
--
--// Vector calling convention not yet implemented.
--const bool Matcher::supports_vector_calling_convention(void) {
-+const bool Matcher::has_predicated_vectors(void) {
-   return false;
- }
- 
--OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
--  Unimplemented();
--  return OptoRegPair(0, 0);
--}
--
- // Is this branch offset short enough that a short branch can be used?
- //
- // NOTE: If the platform does not provide any short branch variants, then
-@@ -1877,11 +1542,6 @@ bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
- 
- // Vector width in bytes.
- const int Matcher::vector_width_in_bytes(BasicType bt) {
--  if (UseRVV) {
--    // The MaxVectorSize should have been set by detecting RVV max vector register size when check UseRVV.
--    // MaxVectorSize == VM_Version::_initial_vector_length
--    return MaxVectorSize;
--  }
-   return 0;
- }
- 
-@@ -1895,34 +1555,10 @@ const int Matcher::min_vector_size(const BasicType bt) {
- 
- // Vector ideal reg.
- const uint Matcher::vector_ideal_reg(int len) {
--  assert(MaxVectorSize >= len, "");
--  if (UseRVV) {
--    return Op_VecA;
--  }
--
-   ShouldNotReachHere();
-   return 0;
- }
- 
--const int Matcher::scalable_vector_reg_size(const BasicType bt) {
--  return Matcher::max_vector_size(bt);
--}
--
--MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, uint ideal_reg, bool is_temp) {
--  ShouldNotReachHere(); // generic vector operands not supported
--  return NULL;
--}
--
--bool Matcher::is_reg2reg_move(MachNode* m) {
--  ShouldNotReachHere(); // generic vector operands not supported
--  return false;
--}
--
--bool Matcher::is_generic_vector(MachOper* opnd) {
--  ShouldNotReachHere(); // generic vector operands not supported
--  return false;
--}
--
- // Return whether or not this register is ever used as an argument.
- // This function is used on startup to build the trampoline stubs in
- // generateOptoStub.  Registers not mentioned will be killed by the VM
-@@ -3384,67 +3020,6 @@ operand fRegD()
-   interface(REG_INTER);
- %}
- 
--// Generic vector class. This will be used for
--// all vector operands.
--operand vReg()
--%{
--  constraint(ALLOC_IN_RC(vectora_reg));
--  match(VecA);
--  op_cost(0);
--  format %{ %}
--  interface(REG_INTER);
--%}
--
--operand vReg_V1()
--%{
--  constraint(ALLOC_IN_RC(v1_reg));
--  match(VecA);
--  match(vReg);
--  op_cost(0);
--  format %{ %}
--  interface(REG_INTER);
--%}
--
--operand vReg_V2()
--%{
--  constraint(ALLOC_IN_RC(v2_reg));
--  match(VecA);
--  match(vReg);
--  op_cost(0);
--  format %{ %}
--  interface(REG_INTER);
--%}
--
--operand vReg_V3()
--%{
--  constraint(ALLOC_IN_RC(v3_reg));
--  match(VecA);
--  match(vReg);
--  op_cost(0);
--  format %{ %}
--  interface(REG_INTER);
--%}
--
--operand vReg_V4()
--%{
--  constraint(ALLOC_IN_RC(v4_reg));
--  match(VecA);
--  match(vReg);
--  op_cost(0);
--  format %{ %}
--  interface(REG_INTER);
--%}
--
--operand vReg_V5()
--%{
--  constraint(ALLOC_IN_RC(v5_reg));
--  match(VecA);
--  match(vReg);
--  op_cost(0);
--  format %{ %}
--  interface(REG_INTER);
--%}
--
- // Java Thread Register
- operand javaThread_RegP(iRegP reg)
- %{
-@@ -7939,17 +7514,6 @@ instruct castDD(fRegD dst)
-   ins_pipe(pipe_class_empty);
- %}
- 
--instruct castVV(vReg dst)
--%{
--  match(Set dst (CastVV dst));
--
--  size(0);
--  format %{ "# castVV of $dst" %}
--  ins_encode(/* empty encoding */);
--  ins_cost(0);
--  ins_pipe(pipe_class_empty);
--%}
--
- // ============================================================================
- // Convert Instructions
- 
-@@ -10076,7 +9640,7 @@ instruct partialSubtypeCheckVsZero(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 su
- instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-                          iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
- %{
--  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
-+  predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
-   match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-   effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
- 
-@@ -10094,7 +9658,7 @@ instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R
- instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-                          iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
- %{
--  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
-+  predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
-   match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-   effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
- 
-@@ -10111,7 +9675,7 @@ instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R
- instruct string_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-                           iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
- %{
--  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
-+  predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
-   match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-   effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
- 
-@@ -10129,7 +9693,7 @@ instruct string_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_
-                           iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3,
-                           rFlagsReg cr)
- %{
--  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
-+  predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
-   match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-   effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
- 
-@@ -10275,7 +9839,7 @@ instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
-                               iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
- %{
-   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
--  predicate(!UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
-+  predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U);
-   effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
-          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
- 
-@@ -10294,7 +9858,7 @@ instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
-                               iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
- %{
-   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
--  predicate(!UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
-+  predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L);
-   effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
-          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
- 
-@@ -10310,7 +9874,6 @@ instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
- // clearing of an array
- instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy)
- %{
--  predicate(!UseRVV);
-   match(Set dummy (ClearArray cnt base));
-   effect(USE_KILL cnt, USE_KILL base);
- 
-@@ -10330,8 +9893,7 @@ instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy)
- 
- instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr)
- %{
--  predicate(!UseRVV && (uint64_t)n->in(2)->get_long()
--            < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
-+  predicate((uint64_t)n->in(2)->get_long() < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
-   match(Set dummy (ClearArray cnt base));
-   effect(USE_KILL base, KILL cr);
- 
-@@ -10348,7 +9910,7 @@ instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg
- instruct string_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
-                         iRegI_R10 result, rFlagsReg cr)
- %{
--  predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
-+  predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
-   match(Set result (StrEquals (Binary str1 str2) cnt));
-   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
- 
-@@ -10364,7 +9926,7 @@ instruct string_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
- instruct string_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
-                         iRegI_R10 result, rFlagsReg cr)
- %{
--  predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
-+  predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
-   match(Set result (StrEquals (Binary str1 str2) cnt));
-   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
- 
-@@ -10381,7 +9943,7 @@ instruct array_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
-                        iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3,
-                        iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr)
- %{
--  predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
-+  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
-   match(Set result (AryEq ary1 ary2));
-   effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr);
- 
-@@ -10398,7 +9960,7 @@ instruct array_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
-                        iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3,
-                        iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr)
- %{
--  predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
-+  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
-   match(Set result (AryEq ary1 ary2));
-   effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr);
- 
-diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad
-deleted file mode 100644
-index 3828e096b21..00000000000
---- a/src/hotspot/cpu/riscv/riscv_v.ad
-+++ /dev/null
-@@ -1,2065 +0,0 @@
--//
--// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
--// Copyright (c) 2020, Arm Limited. All rights reserved.
--// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
--// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
--//
--// This code is free software; you can redistribute it and/or modify it
--// under the terms of the GNU General Public License version 2 only, as
--// published by the Free Software Foundation.
--//
--// This code is distributed in the hope that it will be useful, but WITHOUT
--// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
--// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
--// version 2 for more details (a copy is included in the LICENSE file that
--// accompanied this code).
--//
--// You should have received a copy of the GNU General Public License version
--// 2 along with this work; if not, write to the Free Software Foundation,
--// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
--//
--// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
--// or visit www.oracle.com if you need additional information or have any
--// questions.
--//
--//
--
--// RISCV Vector Extension Architecture Description File
--
--opclass vmemA(indirect);
--
--source_hpp %{
--  bool op_vec_supported(int opcode);
--%}
--
--source %{
--
--  static void loadStore(C2_MacroAssembler masm, bool is_store,
--                        VectorRegister reg, BasicType bt, Register base) {
--    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
--    masm.vsetvli(t0, x0, sew);
--    if (is_store) {
--      masm.vsex_v(reg, base, sew);
--    } else {
--      masm.vlex_v(reg, base, sew);
--    }
--  }
--
--  bool op_vec_supported(int opcode) {
--    switch (opcode) {
--      // No multiply reduction instructions
--      case Op_MulReductionVD:
--      case Op_MulReductionVF:
--      case Op_MulReductionVI:
--      case Op_MulReductionVL:
--      // Others
--      case Op_Extract:
--      case Op_ExtractB:
--      case Op_ExtractC:
--      case Op_ExtractD:
--      case Op_ExtractF:
--      case Op_ExtractI:
--      case Op_ExtractL:
--      case Op_ExtractS:
--      case Op_ExtractUB:
--      // Vector API specific
--      case Op_AndReductionV:
--      case Op_OrReductionV:
--      case Op_XorReductionV:
--      case Op_LoadVectorGather:
--      case Op_StoreVectorScatter:
--      case Op_VectorBlend:
--      case Op_VectorCast:
--      case Op_VectorCastB2X:
--      case Op_VectorCastD2X:
--      case Op_VectorCastF2X:
--      case Op_VectorCastI2X:
--      case Op_VectorCastL2X:
--      case Op_VectorCastS2X:
--      case Op_VectorInsert:
--      case Op_VectorLoadConst:
--      case Op_VectorLoadMask:
--      case Op_VectorLoadShuffle:
--      case Op_VectorMaskCmp:
--      case Op_VectorRearrange:
--      case Op_VectorReinterpret:
--      case Op_VectorStoreMask:
--      case Op_VectorTest:
--        return false;
--      default:
--        return UseRVV;
--    }
--  }
--
--%}
--
--definitions %{
--  int_def VEC_COST             (200, 200);
--%}
--
--// All VEC instructions
--
--// vector load/store
--instruct loadV(vReg dst, vmemA mem) %{
--  match(Set dst (LoadVector mem));
--  ins_cost(VEC_COST);
--  format %{ "vle $dst, $mem\t#@loadV" %}
--  ins_encode %{
--    VectorRegister dst_reg = as_VectorRegister($dst$$reg);
--    loadStore(C2_MacroAssembler(&cbuf), false, dst_reg,
--              Matcher::vector_element_basic_type(this), as_Register($mem$$base));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct storeV(vReg src, vmemA mem) %{
--  match(Set mem (StoreVector mem src));
--  ins_cost(VEC_COST);
--  format %{ "vse $src, $mem\t#@storeV" %}
--  ins_encode %{
--    VectorRegister src_reg = as_VectorRegister($src$$reg);
--    loadStore(C2_MacroAssembler(&cbuf), true, src_reg,
--              Matcher::vector_element_basic_type(this, $src), as_Register($mem$$base));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector abs
--
--instruct vabsB(vReg dst, vReg src, vReg tmp) %{
--  match(Set dst (AbsVB src));
--  ins_cost(VEC_COST);
--  effect(TEMP tmp);
--  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsB\n\t"
--            "vmax.vv $dst, $tmp, $src" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e8);
--    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
--    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vabsS(vReg dst, vReg src, vReg tmp) %{
--  match(Set dst (AbsVS src));
--  ins_cost(VEC_COST);
--  effect(TEMP tmp);
--  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsS\n\t"
--            "vmax.vv $dst, $tmp, $src" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e16);
--    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
--    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vabsI(vReg dst, vReg src, vReg tmp) %{
--  match(Set dst (AbsVI src));
--  ins_cost(VEC_COST);
--  effect(TEMP tmp);
--  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsI\n\t"
--            "vmax.vv $dst, $tmp, $src" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
--    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vabsL(vReg dst, vReg src, vReg tmp) %{
--  match(Set dst (AbsVL src));
--  ins_cost(VEC_COST);
--  effect(TEMP tmp);
--  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsL\n\t"
--            "vmax.vv $dst, $tmp, $src" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
--    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vabsF(vReg dst, vReg src) %{
--  match(Set dst (AbsVF src));
--  ins_cost(VEC_COST);
--  format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsF" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vabsD(vReg dst, vReg src) %{
--  match(Set dst (AbsVD src));
--  ins_cost(VEC_COST);
--  format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsD" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector add
--
--instruct vaddB(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (AddVB src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddB" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e8);
--    __ vadd_vv(as_VectorRegister($dst$$reg),
--               as_VectorRegister($src1$$reg),
--               as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vaddS(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (AddVS src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddS" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e16);
--    __ vadd_vv(as_VectorRegister($dst$$reg),
--               as_VectorRegister($src1$$reg),
--               as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vaddI(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (AddVI src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddI" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vadd_vv(as_VectorRegister($dst$$reg),
--               as_VectorRegister($src1$$reg),
--               as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vaddL(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (AddVL src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddL" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vadd_vv(as_VectorRegister($dst$$reg),
--               as_VectorRegister($src1$$reg),
--               as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vaddF(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (AddVF src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddF" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vfadd_vv(as_VectorRegister($dst$$reg),
--                as_VectorRegister($src1$$reg),
--                as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vaddD(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (AddVD src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddD" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vfadd_vv(as_VectorRegister($dst$$reg),
--                as_VectorRegister($src1$$reg),
--                as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector and
--
--instruct vand(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (AndV src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vand.vv  $dst, $src1, $src2\t#@vand" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vand_vv(as_VectorRegister($dst$$reg),
--               as_VectorRegister($src1$$reg),
--               as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector or
--
--instruct vor(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (OrV src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vor.vv  $dst, $src1, $src2\t#@vor" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vor_vv(as_VectorRegister($dst$$reg),
--              as_VectorRegister($src1$$reg),
--              as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector xor
--
--instruct vxor(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (XorV src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vxor.vv  $dst, $src1, $src2\t#@vxor" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vxor_vv(as_VectorRegister($dst$$reg),
--               as_VectorRegister($src1$$reg),
--               as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector float div
--
--instruct vdivF(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (DivVF src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vfdiv.vv  $dst, $src1, $src2\t#@vdivF" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vfdiv_vv(as_VectorRegister($dst$$reg),
--                as_VectorRegister($src1$$reg),
--                as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vdivD(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (DivVD src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vfdiv.vv  $dst, $src1, $src2\t#@vdivD" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vfdiv_vv(as_VectorRegister($dst$$reg),
--                as_VectorRegister($src1$$reg),
--                as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector integer max/min
--
--instruct vmax(vReg dst, vReg src1, vReg src2) %{
--  predicate(n->bottom_type()->is_vect()->element_basic_type() != T_FLOAT &&
--            n->bottom_type()->is_vect()->element_basic_type() != T_DOUBLE);
--  match(Set dst (MaxV src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vmax.vv $dst, $src1, $src2\t#@vmax" %}
--  ins_encode %{
--    BasicType bt = Matcher::vector_element_basic_type(this);
--    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
--    __ vsetvli(t0, x0, sew);
--    __ vmax_vv(as_VectorRegister($dst$$reg),
--               as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vmin(vReg dst, vReg src1, vReg src2) %{
--  predicate(n->bottom_type()->is_vect()->element_basic_type() != T_FLOAT &&
--            n->bottom_type()->is_vect()->element_basic_type() != T_DOUBLE);
--  match(Set dst (MinV src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vmin.vv $dst, $src1, $src2\t#@vmin" %}
--  ins_encode %{
--    BasicType bt = Matcher::vector_element_basic_type(this);
--    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
--    __ vsetvli(t0, x0, sew);
--    __ vmin_vv(as_VectorRegister($dst$$reg),
--               as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector float-point max/min
--
--instruct vmaxF(vReg dst, vReg src1, vReg src2) %{
--  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
--  match(Set dst (MaxV src1 src2));
--  effect(TEMP_DEF dst);
--  ins_cost(VEC_COST);
--  format %{ "vmaxF $dst, $src1, $src2\t#@vmaxF" %}
--  ins_encode %{
--    __ minmax_FD_v(as_VectorRegister($dst$$reg),
--                   as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
--                   false /* is_double */, false /* is_min */);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vmaxD(vReg dst, vReg src1, vReg src2) %{
--  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
--  match(Set dst (MaxV src1 src2));
--  effect(TEMP_DEF dst);
--  ins_cost(VEC_COST);
--  format %{ "vmaxD $dst, $src1, $src2\t#@vmaxD" %}
--  ins_encode %{
--    __ minmax_FD_v(as_VectorRegister($dst$$reg),
--                   as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
--                   true /* is_double */, false /* is_min */);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vminF(vReg dst, vReg src1, vReg src2) %{
--  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
--  match(Set dst (MinV src1 src2));
--  effect(TEMP_DEF dst);
--  ins_cost(VEC_COST);
--  format %{ "vminF $dst, $src1, $src2\t#@vminF" %}
--  ins_encode %{
--    __ minmax_FD_v(as_VectorRegister($dst$$reg),
--                   as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
--                   false /* is_double */, true /* is_min */);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vminD(vReg dst, vReg src1, vReg src2) %{
--  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
--  match(Set dst (MinV src1 src2));
--  effect(TEMP_DEF dst);
--  ins_cost(VEC_COST);
--  format %{ "vminD $dst, $src1, $src2\t#@vminD" %}
--  ins_encode %{
--    __ minmax_FD_v(as_VectorRegister($dst$$reg),
--                   as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
--                   true /* is_double */, true /* is_min */);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector fmla
--
--// dst_src1 = dst_src1 + src2 * src3
--instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{
--  predicate(UseFMA);
--  match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3)));
--  ins_cost(VEC_COST);
--  format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaF" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vfmacc_vv(as_VectorRegister($dst_src1$$reg),
--                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// dst_src1 = dst_src1 + src2 * src3
--instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{
--  predicate(UseFMA);
--  match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3)));
--  ins_cost(VEC_COST);
--  format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaD" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vfmacc_vv(as_VectorRegister($dst_src1$$reg),
--                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector fmls
--
--// dst_src1 = dst_src1 + -src2 * src3
--// dst_src1 = dst_src1 + src2 * -src3
--instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{
--  predicate(UseFMA);
--  match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3)));
--  match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3))));
--  ins_cost(VEC_COST);
--  format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsF" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg),
--                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// dst_src1 = dst_src1 + -src2 * src3
--// dst_src1 = dst_src1 + src2 * -src3
--instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{
--  predicate(UseFMA);
--  match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3)));
--  match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3))));
--  ins_cost(VEC_COST);
--  format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsD" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg),
--                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector fnmla
--
--// dst_src1 = -dst_src1 + -src2 * src3
--// dst_src1 = -dst_src1 + src2 * -src3
--instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{
--  predicate(UseFMA);
--  match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3)));
--  match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3))));
--  ins_cost(VEC_COST);
--  format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaF" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg),
--                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// dst_src1 = -dst_src1 + -src2 * src3
--// dst_src1 = -dst_src1 + src2 * -src3
--instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{
--  predicate(UseFMA);
--  match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3)));
--  match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3))));
--  ins_cost(VEC_COST);
--  format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaD" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg),
--                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector fnmls
--
--// dst_src1 = -dst_src1 + src2 * src3
--instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{
--  predicate(UseFMA);
--  match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3)));
--  ins_cost(VEC_COST);
--  format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsF" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vfmsac_vv(as_VectorRegister($dst_src1$$reg),
--                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// dst_src1 = -dst_src1 + src2 * src3
--instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{
--  predicate(UseFMA);
--  match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3)));
--  ins_cost(VEC_COST);
--  format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsD" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vfmsac_vv(as_VectorRegister($dst_src1$$reg),
--                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector mla
--
--// dst_src1 = dst_src1 + src2 * src3
--instruct vmlaB(vReg dst_src1, vReg src2, vReg src3) %{
--  match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3)));
--  ins_cost(VEC_COST);
--  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaB" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e8);
--    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
--                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// dst_src1 = dst_src1 + src2 * src3
--instruct vmlaS(vReg dst_src1, vReg src2, vReg src3) %{
--  match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3)));
--  ins_cost(VEC_COST);
--  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaS" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e16);
--    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
--                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// dst_src1 = dst_src1 + src2 * src3
--instruct vmlaI(vReg dst_src1, vReg src2, vReg src3) %{
--  match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3)));
--  ins_cost(VEC_COST);
--  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaI" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
--                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// dst_src1 = dst_src1 + src2 * src3
--instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) %{
--  match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3)));
--  ins_cost(VEC_COST);
--  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaL" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
--                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector mls
--
--// dst_src1 = dst_src1 - src2 * src3
--instruct vmlsB(vReg dst_src1, vReg src2, vReg src3) %{
--  match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3)));
--  ins_cost(VEC_COST);
--  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsB" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e8);
--    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
--                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// dst_src1 = dst_src1 - src2 * src3
--instruct vmlsS(vReg dst_src1, vReg src2, vReg src3) %{
--  match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3)));
--  ins_cost(VEC_COST);
--  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsS" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e16);
--    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
--                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// dst_src1 = dst_src1 - src2 * src3
--instruct vmlsI(vReg dst_src1, vReg src2, vReg src3) %{
--  match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3)));
--  ins_cost(VEC_COST);
--  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsI" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
--                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// dst_src1 = dst_src1 - src2 * src3
--instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) %{
--  match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3)));
--  ins_cost(VEC_COST);
--  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsL" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
--                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector mul
--
--instruct vmulB(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (MulVB src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulB" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e8);
--    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
--               as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vmulS(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (MulVS src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulS" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e16);
--    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
--               as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vmulI(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (MulVI src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulI" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
--               as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vmulL(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (MulVL src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulL" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
--               as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vmulF(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (MulVF src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulF" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
--                as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vmulD(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (MulVD src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulD" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
--                as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector fneg
--
--instruct vnegF(vReg dst, vReg src) %{
--  match(Set dst (NegVF src));
--  ins_cost(VEC_COST);
--  format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegF" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vnegD(vReg dst, vReg src) %{
--  match(Set dst (NegVD src));
--  ins_cost(VEC_COST);
--  format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegD" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// popcount vector
--
--instruct vpopcountI(iRegINoSp dst, vReg src) %{
--  match(Set dst (PopCountVI src));
--  format %{ "vpopc.m $dst, $src\t#@vpopcountI" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vpopc_m(as_Register($dst$$reg), as_VectorRegister($src$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector add reduction
--
--instruct reduce_addB(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
--  match(Set dst (AddReductionVI src1 src2));
--  effect(TEMP tmp);
--  ins_cost(VEC_COST);
--  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addB\n\t"
--            "vredsum.vs $tmp, $src2, $tmp\n\t"
--            "vmv.x.s  $dst, $tmp" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e8);
--    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
--    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
--                  as_VectorRegister($tmp$$reg));
--    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct reduce_addS(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
--  match(Set dst (AddReductionVI src1 src2));
--  effect(TEMP tmp);
--  ins_cost(VEC_COST);
--  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addS\n\t"
--            "vredsum.vs $tmp, $src2, $tmp\n\t"
--            "vmv.x.s  $dst, $tmp" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e16);
--    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
--    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
--                  as_VectorRegister($tmp$$reg));
--    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
--  match(Set dst (AddReductionVI src1 src2));
--  effect(TEMP tmp);
--  ins_cost(VEC_COST);
--  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addI\n\t"
--            "vredsum.vs $tmp, $src2, $tmp\n\t"
--            "vmv.x.s  $dst, $tmp" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
--    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
--                  as_VectorRegister($tmp$$reg));
--    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
--  match(Set dst (AddReductionVL src1 src2));
--  effect(TEMP tmp);
--  ins_cost(VEC_COST);
--  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addL\n\t"
--            "vredsum.vs $tmp, $src2, $tmp\n\t"
--            "vmv.x.s  $dst, $tmp" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
--    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
--                  as_VectorRegister($tmp$$reg));
--    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct reduce_addF(fRegF src1_dst, vReg src2, vReg tmp) %{
--  match(Set src1_dst (AddReductionVF src1_dst src2));
--  effect(TEMP tmp);
--  ins_cost(VEC_COST);
--  format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addF\n\t"
--            "vfredosum.vs $tmp, $src2, $tmp\n\t"
--            "vfmv.f.s $src1_dst, $tmp" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister);
--    __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
--                    as_VectorRegister($tmp$$reg));
--    __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct reduce_addD(fRegD src1_dst, vReg src2, vReg tmp) %{
--  match(Set src1_dst (AddReductionVD src1_dst src2));
--  effect(TEMP tmp);
--  ins_cost(VEC_COST);
--  format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addD\n\t"
--            "vfredosum.vs $tmp, $src2, $tmp\n\t"
--            "vfmv.f.s $src1_dst, $tmp" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister);
--    __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
--                    as_VectorRegister($tmp$$reg));
--    __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector integer max reduction
--instruct vreduce_maxB(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
--  match(Set dst (MaxReductionV src1 src2));
--  ins_cost(VEC_COST);
--  effect(TEMP tmp);
--  format %{ "vreduce_maxB $dst, $src1, $src2, $tmp" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e8);
--    __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
--    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
--    Label Ldone;
--    __ ble(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
--    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
--    __ bind(Ldone);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vreduce_maxS(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
--  match(Set dst (MaxReductionV src1 src2));
--  ins_cost(VEC_COST);
--  effect(TEMP tmp);
--  format %{ "vreduce_maxS $dst, $src1, $src2, $tmp" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e16);
--    __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
--    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
--    Label Ldone;
--    __ ble(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
--    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
--    __ bind(Ldone);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vreduce_maxI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
--  match(Set dst (MaxReductionV src1 src2));
--  ins_cost(VEC_COST);
--  effect(TEMP tmp);
--  format %{ "vreduce_maxI $dst, $src1, $src2, $tmp" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
--    __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
--    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vreduce_maxL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
--  match(Set dst (MaxReductionV src1 src2));
--  ins_cost(VEC_COST);
--  effect(TEMP tmp);
--  format %{ "vreduce_maxL $dst, $src1, $src2, $tmp" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
--    __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
--    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector integer min reduction
--instruct vreduce_minB(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
--  match(Set dst (MinReductionV src1 src2));
--  ins_cost(VEC_COST);
--  effect(TEMP tmp);
--  format %{ "vreduce_minB $dst, $src1, $src2, $tmp" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e8);
--    __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
--    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
--    Label Ldone;
--    __ bge(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
--    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
--    __ bind(Ldone);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vreduce_minS(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
--  match(Set dst (MinReductionV src1 src2));
--  ins_cost(VEC_COST);
--  effect(TEMP tmp);
--  format %{ "vreduce_minS $dst, $src1, $src2, $tmp" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e16);
--    __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
--    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
--    Label Ldone;
--    __ bge(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
--    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
--    __ bind(Ldone);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vreduce_minI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
--  match(Set dst (MinReductionV src1 src2));
--  ins_cost(VEC_COST);
--  effect(TEMP tmp);
--  format %{ "vreduce_minI $dst, $src1, $src2, $tmp" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
--    __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
--    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vreduce_minL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
--  match(Set dst (MinReductionV src1 src2));
--  ins_cost(VEC_COST);
--  effect(TEMP tmp);
--  format %{ "vreduce_minL $dst, $src1, $src2, $tmp" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
--    __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
--    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector float max reduction
--
--instruct vreduce_maxF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
--  match(Set dst (MaxReductionV src1 src2));
--  ins_cost(VEC_COST);
--  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
--  format %{ "reduce_maxF $dst, $src1, $src2, $tmp1, $tmp2" %}
--  ins_encode %{
--    __ reduce_minmax_FD_v($dst$$FloatRegister,
--                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
--                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
--                          false /* is_double */, false /* is_min */);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vreduce_maxD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
--  match(Set dst (MaxReductionV src1 src2));
--  ins_cost(VEC_COST);
--  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
--  format %{ "reduce_maxD $dst, $src1, $src2, $tmp1, $tmp2" %}
--  ins_encode %{
--    __ reduce_minmax_FD_v($dst$$FloatRegister,
--                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
--                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
--                          true /* is_double */, false /* is_min */);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector float min reduction
--
--instruct vreduce_minF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
--  match(Set dst (MinReductionV src1 src2));
--  ins_cost(VEC_COST);
--  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
--  format %{ "reduce_minF $dst, $src1, $src2, $tmp1, $tmp2" %}
--  ins_encode %{
--    __ reduce_minmax_FD_v($dst$$FloatRegister,
--                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
--                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
--                          false /* is_double */, true /* is_min */);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vreduce_minD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{
--  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
--  match(Set dst (MinReductionV src1 src2));
--  ins_cost(VEC_COST);
--  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
--  format %{ "reduce_minD $dst, $src1, $src2, $tmp1, $tmp2" %}
--  ins_encode %{
--    __ reduce_minmax_FD_v($dst$$FloatRegister,
--                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
--                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
--                          true /* is_double */, true /* is_min */);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector Math.rint, floor, ceil
--
--instruct vroundD(vReg dst, vReg src, immI rmode) %{
--  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
--  match(Set dst (RoundDoubleModeV src rmode));
--  format %{ "vroundD $dst, $src, $rmode" %}
--  ins_encode %{
--    switch ($rmode$$constant) {
--      case RoundDoubleModeNode::rmode_rint:
--        __ csrwi(CSR_FRM, C2_MacroAssembler::rne);
--        __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
--        break;
--      case RoundDoubleModeNode::rmode_floor:
--        __ csrwi(CSR_FRM, C2_MacroAssembler::rdn);
--        __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
--        break;
--      case RoundDoubleModeNode::rmode_ceil:
--        __ csrwi(CSR_FRM, C2_MacroAssembler::rup);
--        __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
--        break;
--      default:
--        ShouldNotReachHere();
--        break;
--    }
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector replicate
--
--instruct replicateB(vReg dst, iRegIorL2I src) %{
--  match(Set dst (ReplicateB src));
--  ins_cost(VEC_COST);
--  format %{ "vmv.v.x  $dst, $src\t#@replicateB" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e8);
--    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct replicateS(vReg dst, iRegIorL2I src) %{
--  match(Set dst (ReplicateS src));
--  ins_cost(VEC_COST);
--  format %{ "vmv.v.x  $dst, $src\t#@replicateS" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e16);
--    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct replicateI(vReg dst, iRegIorL2I src) %{
--  match(Set dst (ReplicateI src));
--  ins_cost(VEC_COST);
--  format %{ "vmv.v.x  $dst, $src\t#@replicateI" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct replicateL(vReg dst, iRegL src) %{
--  match(Set dst (ReplicateL src));
--  ins_cost(VEC_COST);
--  format %{ "vmv.v.x  $dst, $src\t#@replicateL" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct replicateB_imm5(vReg dst, immI5 con) %{
--  match(Set dst (ReplicateB con));
--  ins_cost(VEC_COST);
--  format %{ "vmv.v.i  $dst, $con\t#@replicateB_imm5" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e8);
--    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct replicateS_imm5(vReg dst, immI5 con) %{
--  match(Set dst (ReplicateS con));
--  ins_cost(VEC_COST);
--  format %{ "vmv.v.i  $dst, $con\t#@replicateS_imm5" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e16);
--    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct replicateI_imm5(vReg dst, immI5 con) %{
--  match(Set dst (ReplicateI con));
--  ins_cost(VEC_COST);
--  format %{ "vmv.v.i  $dst, $con\t#@replicateI_imm5" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct replicateL_imm5(vReg dst, immL5 con) %{
--  match(Set dst (ReplicateL con));
--  ins_cost(VEC_COST);
--  format %{ "vmv.v.i  $dst, $con\t#@replicateL_imm5" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct replicateF(vReg dst, fRegF src) %{
--  match(Set dst (ReplicateF src));
--  ins_cost(VEC_COST);
--  format %{ "vfmv.v.f  $dst, $src\t#@replicateF" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct replicateD(vReg dst, fRegD src) %{
--  match(Set dst (ReplicateD src));
--  ins_cost(VEC_COST);
--  format %{ "vfmv.v.f  $dst, $src\t#@replicateD" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector shift
--
--instruct vasrB(vReg dst, vReg src, vReg shift) %{
--  match(Set dst (RShiftVB src shift));
--  ins_cost(VEC_COST);
--  effect(TEMP_DEF dst);
--  format %{ "vmsgtu.vi v0, $shift 7\t#@vasrB\n\t"
--            "vsra.vi $dst, $src, 7, Assembler::v0_t\n\t"
--            "vmnot.m v0, v0\n\t"
--            "vsra.vv $dst, $src, $shift, Assembler::v0_t" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e8);
--    // if shift > BitsPerByte - 1, clear the low BitsPerByte - 1 bits
--    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
--    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               BitsPerByte - 1, Assembler::v0_t);
--    // otherwise, shift
--    __ vmnot_m(v0, v0);
--    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               as_VectorRegister($shift$$reg), Assembler::v0_t);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vasrS(vReg dst, vReg src, vReg shift) %{
--  match(Set dst (RShiftVS src shift));
--  ins_cost(VEC_COST);
--  effect(TEMP_DEF dst);
--  format %{ "vmsgtu.vi v0, $shift, 15\t#@vasrS\n\t"
--            "vsra.vi $dst, $src, 15, Assembler::v0_t\n\t"
--            "vmnot.m v0, v0\n\t"
--            "vsra.vv $dst, $src, $shift, Assembler::v0_t" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e16);
--    // if shift > BitsPerShort - 1, clear the low BitsPerShort - 1 bits
--    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
--    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               BitsPerShort - 1, Assembler::v0_t);
--    // otherwise, shift
--    __ vmnot_m(v0, v0);
--    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               as_VectorRegister($shift$$reg), Assembler::v0_t);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vasrI(vReg dst, vReg src, vReg shift) %{
--  match(Set dst (RShiftVI src shift));
--  ins_cost(VEC_COST);
--  format %{ "vsra.vv $dst, $src, $shift\t#@vasrI" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               as_VectorRegister($shift$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vasrL(vReg dst, vReg src, vReg shift) %{
--  match(Set dst (RShiftVL src shift));
--  ins_cost(VEC_COST);
--  format %{ "vsra.vv $dst, $src, $shift\t#@vasrL" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--         as_VectorRegister($shift$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vlslB(vReg dst, vReg src, vReg shift) %{
--  match(Set dst (LShiftVB src shift));
--  ins_cost(VEC_COST);
--  effect( TEMP_DEF dst);
--  format %{ "vmsgtu.vi v0, $shift, 7\t#@vlslB\n\t"
--            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
--            "vmnot.m v0, v0\n\t"
--            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e8);
--    // if shift > BitsPerByte - 1, clear the element
--    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
--    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               as_VectorRegister($src$$reg), Assembler::v0_t);
--    // otherwise, shift
--    __ vmnot_m(v0, v0);
--    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               as_VectorRegister($shift$$reg), Assembler::v0_t);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vlslS(vReg dst, vReg src, vReg shift) %{
--  match(Set dst (LShiftVS src shift));
--  ins_cost(VEC_COST);
--  effect(TEMP_DEF dst);
--  format %{ "vmsgtu.vi v0, $shift, 15\t#@vlslS\n\t"
--            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
--            "vmnot.m v0, v0\n\t"
--            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e16);
--    // if shift > BitsPerShort - 1, clear the element
--    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
--    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               as_VectorRegister($src$$reg), Assembler::v0_t);
--    // otherwise, shift
--    __ vmnot_m(v0, v0);
--    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               as_VectorRegister($shift$$reg), Assembler::v0_t);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vlslI(vReg dst, vReg src, vReg shift) %{
--  match(Set dst (LShiftVI src shift));
--  ins_cost(VEC_COST);
--  format %{ "vsll.vv $dst, $src, $shift\t#@vlslI" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               as_VectorRegister($shift$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vlslL(vReg dst, vReg src, vReg shift) %{
--  match(Set dst (LShiftVL src shift));
--  ins_cost(VEC_COST);
--  format %{ "vsll.vv $dst, $src, $shift\t# vector (D)" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               as_VectorRegister($shift$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vlsrB(vReg dst, vReg src, vReg shift) %{
--  match(Set dst (URShiftVB src shift));
--  ins_cost(VEC_COST);
--  effect(TEMP_DEF dst);
--  format %{ "vmsgtu.vi v0, $shift, 7\t#@vlsrB\n\t"
--            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
--            "vmnot.m v0, v0, v0\n\t"
--            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e8);
--    // if shift > BitsPerByte - 1, clear the element
--    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
--    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               as_VectorRegister($src$$reg), Assembler::v0_t);
--    // otherwise, shift
--    __ vmnot_m(v0, v0);
--    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               as_VectorRegister($shift$$reg), Assembler::v0_t);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vlsrS(vReg dst, vReg src, vReg shift) %{
--  match(Set dst (URShiftVS src shift));
--  ins_cost(VEC_COST);
--  effect(TEMP_DEF dst);
--  format %{ "vmsgtu.vi v0, $shift, 15\t#@vlsrS\n\t"
--            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
--            "vmnot.m v0, v0\n\t"
--            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e16);
--    // if shift > BitsPerShort - 1, clear the element
--    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
--    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               as_VectorRegister($src$$reg), Assembler::v0_t);
--    // otherwise, shift
--    __ vmnot_m(v0, v0);
--    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               as_VectorRegister($shift$$reg), Assembler::v0_t);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--
--instruct vlsrI(vReg dst, vReg src, vReg shift) %{
--  match(Set dst (URShiftVI src shift));
--  ins_cost(VEC_COST);
--  format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrI" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               as_VectorRegister($shift$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--
--instruct vlsrL(vReg dst, vReg src, vReg shift) %{
--  match(Set dst (URShiftVL src shift));
--  ins_cost(VEC_COST);
--  format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrL" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--               as_VectorRegister($shift$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vasrB_imm(vReg dst, vReg src, immI shift) %{
--  match(Set dst (RShiftVB src (RShiftCntV shift)));
--  ins_cost(VEC_COST);
--  format %{ "vsra.vi $dst, $src, $shift\t#@vasrB_imm" %}
--  ins_encode %{
--    uint32_t con = (unsigned)$shift$$constant & 0x1f;
--    __ vsetvli(t0, x0, Assembler::e8);
--    if (con == 0) {
--      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--                as_VectorRegister($src$$reg));
--      return;
--    }
--    if (con >= BitsPerByte) con = BitsPerByte - 1;
--    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vasrS_imm(vReg dst, vReg src, immI shift) %{
--  match(Set dst (RShiftVS src (RShiftCntV shift)));
--  ins_cost(VEC_COST);
--  format %{ "vsra.vi $dst, $src, $shift\t#@vasrS_imm" %}
--  ins_encode %{
--    uint32_t con = (unsigned)$shift$$constant & 0x1f;
--    __ vsetvli(t0, x0, Assembler::e16);
--    if (con == 0) {
--      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--                as_VectorRegister($src$$reg));
--      return;
--    }
--    if (con >= BitsPerShort) con = BitsPerShort - 1;
--    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vasrI_imm(vReg dst, vReg src, immI shift) %{
--  match(Set dst (RShiftVI src (RShiftCntV shift)));
--  ins_cost(VEC_COST);
--  format %{ "vsrl.vi $dst, $src, $shift\t#@vasrI_imm" %}
--  ins_encode %{
--    uint32_t con = (unsigned)$shift$$constant & 0x1f;
--    __ vsetvli(t0, x0, Assembler::e32);
--    if (con == 0) {
--      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--                as_VectorRegister($src$$reg));
--      return;
--    }
--    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vasrL_imm(vReg dst, vReg src, immI shift) %{
--  predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32);
--  match(Set dst (RShiftVL src (RShiftCntV shift)));
--  ins_cost(VEC_COST);
--  format %{ "vsrl.vi $dst, $src, $shift\t#@vasrL_imm" %}
--  ins_encode %{
--    uint32_t con = (unsigned)$shift$$constant & 0x1f;
--    __ vsetvli(t0, x0, Assembler::e64);
--    if (con == 0) {
--      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--                as_VectorRegister($src$$reg));
--      return;
--    }
--    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{
--  match(Set dst (URShiftVB src (RShiftCntV shift)));
--  ins_cost(VEC_COST);
--  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrB_imm" %}
--  ins_encode %{
--    uint32_t con = (unsigned)$shift$$constant & 0x1f;
--    __ vsetvli(t0, x0, Assembler::e8);
--    if (con == 0) {
--      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--                as_VectorRegister($src$$reg));
--      return;
--    }
--    if (con >= BitsPerByte) {
--      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--                 as_VectorRegister($src$$reg));
--      return;
--    }
--    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{
--  match(Set dst (URShiftVS src (RShiftCntV shift)));
--  ins_cost(VEC_COST);
--  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrS_imm" %}
--  ins_encode %{
--    uint32_t con = (unsigned)$shift$$constant & 0x1f;
--    __ vsetvli(t0, x0, Assembler::e16);
--    if (con == 0) {
--      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--                as_VectorRegister($src$$reg));
--      return;
--    }
--    if (con >= BitsPerShort) {
--      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--                 as_VectorRegister($src$$reg));
--      return;
--    }
--    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{
--  match(Set dst (URShiftVI src (RShiftCntV shift)));
--  ins_cost(VEC_COST);
--  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrI_imm" %}
--  ins_encode %{
--    uint32_t con = (unsigned)$shift$$constant & 0x1f;
--    __ vsetvli(t0, x0, Assembler::e32);
--    if (con == 0) {
--      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--                as_VectorRegister($src$$reg));
--      return;
--    }
--    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{
--  predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32);
--  match(Set dst (URShiftVL src (RShiftCntV shift)));
--  ins_cost(VEC_COST);
--  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrL_imm" %}
--  ins_encode %{
--    uint32_t con = (unsigned)$shift$$constant & 0x1f;
--    __ vsetvli(t0, x0, Assembler::e64);
--    if (con == 0) {
--      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--                as_VectorRegister($src$$reg));
--      return;
--    }
--    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vlslB_imm(vReg dst, vReg src, immI shift) %{
--  match(Set dst (LShiftVB src (LShiftCntV shift)));
--  ins_cost(VEC_COST);
--  format %{ "vsll.vi $dst, $src, $shift\t#@vlslB_imm" %}
--  ins_encode %{
--    uint32_t con = (unsigned)$shift$$constant & 0x1f;
--    __ vsetvli(t0, x0, Assembler::e8);
--    if (con >= BitsPerByte) {
--      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--                 as_VectorRegister($src$$reg));
--      return;
--    }
--    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vlslS_imm(vReg dst, vReg src, immI shift) %{
--  match(Set dst (LShiftVS src (LShiftCntV shift)));
--  ins_cost(VEC_COST);
--  format %{ "vsll.vi $dst, $src, $shift\t#@vlslS_imm" %}
--  ins_encode %{
--    uint32_t con = (unsigned)$shift$$constant & 0x1f;
--    __ vsetvli(t0, x0, Assembler::e16);
--    if (con >= BitsPerShort) {
--      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
--                 as_VectorRegister($src$$reg));
--      return;
--    }
--    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vlslI_imm(vReg dst, vReg src, immI shift) %{
--  match(Set dst (LShiftVI src (LShiftCntV shift)));
--  ins_cost(VEC_COST);
--  format %{ "vsll.vi $dst, $src, $shift\t#@vlslI_imm" %}
--  ins_encode %{
--    uint32_t con = (unsigned)$shift$$constant & 0x1f;
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vlslL_imm(vReg dst, vReg src, immI shift) %{
--  predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32);
--  match(Set dst (LShiftVL src (LShiftCntV shift)));
--  ins_cost(VEC_COST);
--  format %{ "vsll.vi $dst, $src, $shift\t#@vlslL_imm" %}
--  ins_encode %{
--    uint32_t con = (unsigned)$shift$$constant & 0x1f;
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{
--  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
--  match(Set dst (LShiftCntV cnt));
--  match(Set dst (RShiftCntV cnt));
--  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntB" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e8);
--    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{
--  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
--            n->bottom_type()->is_vect()->element_basic_type() == T_CHAR);
--  match(Set dst (LShiftCntV cnt));
--  match(Set dst (RShiftCntV cnt));
--  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntS" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e16);
--    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{
--  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_INT);
--  match(Set dst (LShiftCntV cnt));
--  match(Set dst (RShiftCntV cnt));
--  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntI" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{
--  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
--  match(Set dst (LShiftCntV cnt));
--  match(Set dst (RShiftCntV cnt));
--  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntL" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector sqrt
--
--instruct vsqrtF(vReg dst, vReg src) %{
--  match(Set dst (SqrtVF src));
--  ins_cost(VEC_COST);
--  format %{ "vfsqrt.v $dst, $src\t#@vsqrtF" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vsqrtD(vReg dst, vReg src) %{
--  match(Set dst (SqrtVD src));
--  ins_cost(VEC_COST);
--  format %{ "vfsqrt.v $dst, $src\t#@vsqrtD" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--// vector sub
--
--instruct vsubB(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (SubVB src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubB" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e8);
--    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
--               as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vsubS(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (SubVS src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubS" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e16);
--    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
--               as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vsubI(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (SubVI src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubI" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
--               as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vsubL(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (SubVL src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubL" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
--               as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vsubF(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (SubVF src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vfsub.vv $dst, $src1, $src2\t@vsubF" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e32);
--    __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
--                as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vsubD(vReg dst, vReg src1, vReg src2) %{
--  match(Set dst (SubVD src1 src2));
--  ins_cost(VEC_COST);
--  format %{ "vfsub.vv $dst, $src1, $src2\t#@vsubD" %}
--  ins_encode %{
--    __ vsetvli(t0, x0, Assembler::e64);
--    __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
--                as_VectorRegister($src2$$reg));
--  %}
--  ins_pipe(pipe_slow);
--%}
--
--instruct vstring_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
--                         iRegI_R10 result, vReg_V1 v1,
--                         vReg_V2 v2, vReg_V3 v3, rFlagsReg cr)
--%{
--  predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
--  match(Set result (StrEquals (Binary str1 str2) cnt));
--  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr);
--
--  format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %}
--  ins_encode %{
--    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
--    __ string_equals_v($str1$$Register, $str2$$Register,
--                       $result$$Register, $cnt$$Register, 1);
--  %}
--  ins_pipe(pipe_class_memory);
--%}
--
--instruct vstring_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
--                         iRegI_R10 result, vReg_V1 v1,
--                         vReg_V2 v2, vReg_V3 v3, rFlagsReg cr)
--%{
--  predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
--  match(Set result (StrEquals (Binary str1 str2) cnt));
--  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr);
--
--  format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %}
--  ins_encode %{
--    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
--    __ string_equals_v($str1$$Register, $str2$$Register,
--                       $result$$Register, $cnt$$Register, 2);
--  %}
--  ins_pipe(pipe_class_memory);
--%}
--
--instruct varray_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
--                        vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr)
--%{
--  predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
--  match(Set result (AryEq ary1 ary2));
--  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr);
--
--  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %}
--  ins_encode %{
--    __ arrays_equals_v($ary1$$Register, $ary2$$Register,
--                       $result$$Register, $tmp$$Register, 1);
--    %}
--  ins_pipe(pipe_class_memory);
--%}
--
--instruct varray_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
--                        vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr)
--%{
--  predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
--  match(Set result (AryEq ary1 ary2));
--  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr);
--
--  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %}
--  ins_encode %{
--    __ arrays_equals_v($ary1$$Register, $ary2$$Register,
--                       $result$$Register, $tmp$$Register, 2);
--  %}
--  ins_pipe(pipe_class_memory);
--%}
--
--instruct vstring_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
--                          iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
--                          iRegP_R28 tmp1, iRegL_R29 tmp2)
--%{
--  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
--  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
--  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
--         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
--
--  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %}
--  ins_encode %{
--    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
--    __ string_compare_v($str1$$Register, $str2$$Register,
--                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
--                        $tmp1$$Register, $tmp2$$Register,
--                        StrIntrinsicNode::UU);
--  %}
--  ins_pipe(pipe_class_memory);
--%}
--instruct vstring_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
--                          iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
--                          iRegP_R28 tmp1, iRegL_R29 tmp2)
--%{
--  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
--  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
--  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
--         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
--
--  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %}
--  ins_encode %{
--    __ string_compare_v($str1$$Register, $str2$$Register,
--                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
--                        $tmp1$$Register, $tmp2$$Register,
--                        StrIntrinsicNode::LL);
--  %}
--  ins_pipe(pipe_class_memory);
--%}
--
--instruct vstring_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
--                           iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
--                           iRegP_R28 tmp1, iRegL_R29 tmp2)
--%{
--  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
--  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
--  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
--         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
--
--  format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %}
--  ins_encode %{
--    __ string_compare_v($str1$$Register, $str2$$Register,
--                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
--                        $tmp1$$Register, $tmp2$$Register,
--                        StrIntrinsicNode::UL);
--  %}
--  ins_pipe(pipe_class_memory);
--%}
--instruct vstring_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
--                           iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
--                           iRegP_R28 tmp1, iRegL_R29 tmp2)
--%{
--  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
--  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
--  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
--         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
--
--  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %}
--  ins_encode %{
--    __ string_compare_v($str1$$Register, $str2$$Register,
--                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
--                        $tmp1$$Register, $tmp2$$Register,
--                        StrIntrinsicNode::LU);
--  %}
--  ins_pipe(pipe_class_memory);
--%}
--
--// fast byte[] to char[] inflation
--instruct vstring_inflate(Universe dummy, iRegP_R10 src, iRegP_R11 dst, iRegI_R12 len,
--                         vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp)
--%{
--  predicate(UseRVV);
--  match(Set dummy (StrInflatedCopy src (Binary dst len)));
--  effect(TEMP v1, TEMP v2, TEMP v3, TEMP tmp, USE_KILL src, USE_KILL dst, USE_KILL len);
--
--  format %{ "String Inflate $src,$dst" %}
--  ins_encode %{
--    __ byte_array_inflate_v($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register);
--  %}
--  ins_pipe(pipe_class_memory);
--%}
--
--// encode char[] to byte[] in ISO_8859_1
--instruct vencode_iso_array(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result,
--                           vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp)
--%{
--  predicate(UseRVV);
--  match(Set result (EncodeISOArray src (Binary dst len)));
--  effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len,
--         TEMP v1, TEMP v2, TEMP v3, TEMP tmp);
--
--  format %{ "Encode array $src,$dst,$len -> $result" %}
--  ins_encode %{
--    __ encode_iso_array_v($src$$Register, $dst$$Register, $len$$Register,
--                          $result$$Register, $tmp$$Register);
--  %}
--  ins_pipe( pipe_class_memory );
--%}
--
--// fast char[] to byte[] compression
--instruct vstring_compress(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result,
--                          vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp)
--%{
--  predicate(UseRVV);
--  match(Set result (StrCompressedCopy src (Binary dst len)));
--  effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len,
--         TEMP v1, TEMP v2, TEMP v3, TEMP tmp);
--
--  format %{ "String Compress $src,$dst -> $result    // KILL R11, R12, R13" %}
--  ins_encode %{
--    __ char_array_compress_v($src$$Register, $dst$$Register, $len$$Register,
--                             $result$$Register, $tmp$$Register);
--  %}
--  ins_pipe( pipe_slow );
--%}
--
--instruct vcount_positives(iRegP_R11 ary, iRegI_R12 len, iRegI_R10 result, iRegL tmp)
--%{
--  predicate(UseRVV);
--  match(Set result (CountPositives ary len));
--  effect(USE_KILL ary, USE_KILL len, TEMP tmp);
--
--  format %{ "count positives byte[] $ary, $len -> $result" %}
--  ins_encode %{
--    __ count_positives_v($ary$$Register, $len$$Register, $result$$Register, $tmp$$Register);
--  %}
--
--  ins_pipe(pipe_slow);
--%}
--
--instruct vstringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
--                               iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
--                               vReg_V1 v1, vReg_V2 v2, vReg_V3 v3)
--%{
--  predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
--  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
--  effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
--         TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3);
--
--  format %{ "StringUTF16 IndexOf char[] $str1, $cnt1, $ch -> $result" %}
--
--  ins_encode %{
--    __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register,
--                             $result$$Register, $tmp1$$Register, $tmp2$$Register,
--                             false /* isL */);
--  %}
--
--  ins_pipe(pipe_class_memory);
--%}
--
--instruct vstringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
--                               iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
--                               vReg_V1 v1, vReg_V2 v2, vReg_V3 v3)
--%{
--  predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
--  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
--  effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
--         TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3);
--
--  format %{ "StringLatin1 IndexOf char[] $str1, $cnt1, $ch -> $result" %}
--
--  ins_encode %{
--    __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register,
--                             $result$$Register, $tmp1$$Register, $tmp2$$Register,
--                             true /* isL */);
--  %}
--
--  ins_pipe(pipe_class_memory);
--%}
--
--// clearing of an array
--instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy,
--                             vReg_V1 vReg1, vReg_V2 vReg2, vReg_V3 vReg3)
--%{
--  predicate(UseRVV);
--  match(Set dummy (ClearArray cnt base));
--  effect(USE_KILL cnt, USE_KILL base, TEMP vReg1, TEMP vReg2, TEMP vReg3);
--
--  format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %}
--
--  ins_encode %{
--    __ clear_array_v($base$$Register, $cnt$$Register);
--  %}
--
--  ins_pipe(pipe_class_memory);
--%}
-diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-index f85d4b25a76..4daed17df10 100644
---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-@@ -80,9 +80,8 @@ class SimpleRuntimeFrame {
- };
- 
- class RegisterSaver {
--  const bool _save_vectors;
-  public:
--  RegisterSaver(bool save_vectors) : _save_vectors(UseRVV && save_vectors) {}
-+  RegisterSaver() {}
-   ~RegisterSaver() {}
-   OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
-   void restore_live_registers(MacroAssembler* masm);
-@@ -91,11 +90,7 @@ class RegisterSaver {
-   // Used by deoptimization when it is managing result register
-   // values on its own
-   // gregs:28, float_register:32; except: x1(ra) & x2(sp) & gp(x3) & tp(x4)
--  // |---v0---|<---SP
--  // |---v1---|save vectors only in generate_handler_blob
--  // |-- .. --|
--  // |---v31--|-----
--  // |---f0---|
-+  // |---f0---|<---SP
-   // |---f1---|
-   // |   ..   |
-   // |---f31--|
-@@ -106,16 +101,8 @@ class RegisterSaver {
-   // |---x31--|
-   // |---fp---|
-   // |---ra---|
--  int v0_offset_in_bytes(void) { return 0; }
-   int f0_offset_in_bytes(void) {
--    int f0_offset = 0;
--#ifdef COMPILER2
--    if (_save_vectors) {
--      f0_offset += Matcher::scalable_vector_reg_size(T_INT) * VectorRegisterImpl::number_of_registers *
--                   BytesPerInt;
--    }
--#endif
--    return f0_offset;
-+    return 0;
-   }
-   int reserved_slot_offset_in_bytes(void) {
-     return f0_offset_in_bytes() +
-@@ -142,15 +129,6 @@ class RegisterSaver {
- };
- 
- OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
--  int vector_size_in_bytes = 0;
--  int vector_size_in_slots = 0;
--#ifdef COMPILER2
--  if (_save_vectors) {
--    vector_size_in_bytes += Matcher::scalable_vector_reg_size(T_BYTE);
--    vector_size_in_slots += Matcher::scalable_vector_reg_size(T_INT);
--  }
--#endif
--
-   assert_cond(masm != NULL && total_frame_words != NULL);
-   int frame_size_in_bytes = align_up(additional_frame_words * wordSize + ra_offset_in_bytes() + wordSize, 16);
-   // OopMap frame size is in compiler stack slots (jint's) not bytes or words
-@@ -161,9 +139,9 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
-   int frame_size_in_words = frame_size_in_bytes / wordSize;
-   *total_frame_words = frame_size_in_words;
- 
--  // Save Integer, Float and Vector registers.
-+  // Save Integer and Float registers.
-   __ enter();
--  __ push_CPU_state(_save_vectors, vector_size_in_bytes);
-+  __ push_CPU_state();
- 
-   // Set an oopmap for the call site.  This oopmap will map all
-   // oop-registers and debug-info registers as callee-saved.  This
-@@ -176,13 +154,6 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
- 
-   int sp_offset_in_slots = 0;
-   int step_in_slots = 0;
--  if (_save_vectors) {
--    step_in_slots = vector_size_in_slots;
--    for (int i = 0; i < VectorRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
--      VectorRegister r = as_VectorRegister(i);
--      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg());
--    }
--  }
- 
-   step_in_slots = FloatRegisterImpl::max_slots_per_register;
-   for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
-@@ -207,18 +178,13 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
- 
- void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
-   assert_cond(masm != NULL);
--#ifdef COMPILER2
--  __ pop_CPU_state(_save_vectors, Matcher::scalable_vector_reg_size(T_BYTE));
--#else
--  __ pop_CPU_state(_save_vectors);
--#endif
-+  __ pop_CPU_state();
-   __ leave();
- }
- 
- // Is vector's size (in bytes) bigger than a size saved by default?
--// riscv does not ovlerlay the floating-point registers on vector registers like aarch64.
- bool SharedRuntime::is_wide_vector(int size) {
--  return UseRVV;
-+  return false;
- }
- 
- // The java_calling_convention describes stack locations as ideal slots on
-@@ -674,13 +640,6 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
-   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry);
- }
- 
--int SharedRuntime::vector_calling_convention(VMRegPair *regs,
--                                             uint num_bits,
--                                             uint total_args_passed) {
--  Unimplemented();
--  return 0;
--}
--
- int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
-                                          VMRegPair *regs,
-                                          VMRegPair *regs2,
-@@ -1891,7 +1850,7 @@ void SharedRuntime::generate_deopt_blob() {
-   OopMap* map = NULL;
-   OopMapSet *oop_maps = new OopMapSet();
-   assert_cond(masm != NULL && oop_maps != NULL);
--  RegisterSaver reg_saver(COMPILER2_OR_JVMCI != 0);
-+  RegisterSaver reg_saver;
- 
-   // -------------
-   // This code enters when returning to a de-optimized nmethod.  A return
-@@ -2423,7 +2382,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
-   address call_pc = NULL;
-   int frame_size_in_words = -1;
-   bool cause_return = (poll_type == POLL_AT_RETURN);
--  RegisterSaver reg_saver(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */);
-+  RegisterSaver reg_saver;
- 
-   // Save Integer and Float registers.
-   map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
-@@ -2542,7 +2501,7 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha
-   assert_cond(masm != NULL);
- 
-   int frame_size_in_words = -1;
--  RegisterSaver reg_saver(false /* save_vectors */);
-+  RegisterSaver reg_saver;
- 
-   OopMapSet *oop_maps = new OopMapSet();
-   assert_cond(oop_maps != NULL);
-diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-index b05edf7172c..39416441bdf 100644
---- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-@@ -2843,111 +2843,6 @@ class StubGenerator: public StubCodeGenerator {
- 
-     return entry;
-   }
--
--  // Arguments:
--  //
--  // Input:
--  //   c_rarg0   - newArr address
--  //   c_rarg1   - oldArr address
--  //   c_rarg2   - newIdx
--  //   c_rarg3   - shiftCount
--  //   c_rarg4   - numIter
--  //
--  address generate_bigIntegerLeftShift() {
--    __ align(CodeEntryAlignment);
--    StubCodeMark mark(this, "StubRoutines", "bigIntegerLeftShiftWorker");
--    address entry = __ pc();
--
--    Label loop, exit;
--
--    Register newArr        = c_rarg0;
--    Register oldArr        = c_rarg1;
--    Register newIdx        = c_rarg2;
--    Register shiftCount    = c_rarg3;
--    Register numIter       = c_rarg4;
--
--    Register shiftRevCount = c_rarg5;
--    Register oldArrNext    = t1;
--
--    __ beqz(numIter, exit);
--    __ shadd(newArr, newIdx, newArr, t0, 2);
--
--    __ li(shiftRevCount, 32);
--    __ sub(shiftRevCount, shiftRevCount, shiftCount);
--
--    __ bind(loop);
--    __ addi(oldArrNext, oldArr, 4);
--    __ vsetvli(t0, numIter, Assembler::e32, Assembler::m4);
--    __ vle32_v(v0, oldArr);
--    __ vle32_v(v4, oldArrNext);
--    __ vsll_vx(v0, v0, shiftCount);
--    __ vsrl_vx(v4, v4, shiftRevCount);
--    __ vor_vv(v0, v0, v4);
--    __ vse32_v(v0, newArr);
--    __ sub(numIter, numIter, t0);
--    __ shadd(oldArr, t0, oldArr, t1, 2);
--    __ shadd(newArr, t0, newArr, t1, 2);
--    __ bnez(numIter, loop);
--
--    __ bind(exit);
--    __ ret();
--
--    return entry;
--  }
--
--  // Arguments:
--  //
--  // Input:
--  //   c_rarg0   - newArr address
--  //   c_rarg1   - oldArr address
--  //   c_rarg2   - newIdx
--  //   c_rarg3   - shiftCount
--  //   c_rarg4   - numIter
--  //
--  address generate_bigIntegerRightShift() {
--    __ align(CodeEntryAlignment);
--    StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
--    address entry = __ pc();
--
--    Label loop, exit;
--
--    Register newArr        = c_rarg0;
--    Register oldArr        = c_rarg1;
--    Register newIdx        = c_rarg2;
--    Register shiftCount    = c_rarg3;
--    Register numIter       = c_rarg4;
--    Register idx           = numIter;
--
--    Register shiftRevCount = c_rarg5;
--    Register oldArrNext    = c_rarg6;
--    Register newArrCur     = t0;
--    Register oldArrCur     = t1;
--
--    __ beqz(idx, exit);
--    __ shadd(newArr, newIdx, newArr, t0, 2);
--
--    __ li(shiftRevCount, 32);
--    __ sub(shiftRevCount, shiftRevCount, shiftCount);
--
--    __ bind(loop);
--    __ vsetvli(t0, idx, Assembler::e32, Assembler::m4);
--    __ sub(idx, idx, t0);
--    __ shadd(oldArrNext, idx, oldArr, t1, 2);
--    __ shadd(newArrCur, idx, newArr, t1, 2);
--    __ addi(oldArrCur, oldArrNext, 4);
--    __ vle32_v(v0, oldArrCur);
--    __ vle32_v(v4, oldArrNext);
--    __ vsrl_vx(v0, v0, shiftCount);
--    __ vsll_vx(v4, v4, shiftRevCount);
--    __ vor_vv(v0, v0, v4);
--    __ vse32_v(v0, newArrCur);
--    __ bnez(idx, loop);
--
--    __ bind(exit);
--    __ ret();
--
--    return entry;
--  }
- #endif
- 
- #ifdef COMPILER2
-@@ -3813,11 +3708,6 @@ class StubGenerator: public StubCodeGenerator {
-       MontgomeryMultiplyGenerator g(_masm, /*squaring*/true);
-       StubRoutines::_montgomerySquare = g.generate_square();
-     }
--
--    if (UseRVVForBigIntegerShiftIntrinsics) {
--      StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift();
--      StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift();
--    }
- #endif
- 
-     generate_compare_long_strings();
-diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-index 768c7633ca6..2c15a834542 100644
---- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-@@ -167,10 +167,6 @@ void VM_Version::c2_initialize() {
-     FLAG_SET_DEFAULT(MaxVectorSize, 0);
-   }
- 
--  if (!UseRVV) {
--    FLAG_SET_DEFAULT(UseRVVForBigIntegerShiftIntrinsics, false);
--  }
--
-   if (UseRVV) {
-     if (FLAG_IS_DEFAULT(MaxVectorSize)) {
-       MaxVectorSize = _initial_vector_length;
-diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
-index aa7222dc64a..1f6eff96cba 100644
---- a/src/hotspot/cpu/riscv/vmreg_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
-@@ -45,16 +45,8 @@ void VMRegImpl::set_regName() {
-     freg = freg->successor();
-   }
- 
--  VectorRegister vreg = ::as_VectorRegister(0);
--  for ( ; i < ConcreteRegisterImpl::max_vpr ; ) {
--    for (int j = 0 ; j < VectorRegisterImpl::max_slots_per_register ; j++) {
--      regName[i++] = reg->name();
--    }
--    vreg = vreg->successor();
--  }
--
-   for ( ; i < ConcreteRegisterImpl::number_of_registers ; i++) {
--    regName[i] = "NON-GPR-FPR-VPR";
-+    regName[i] = "NON-GPR-FPR";
-   }
- }
- 
-diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.hpp
-index 9e611b1f671..6f613a8f11a 100644
---- a/src/hotspot/cpu/riscv/vmreg_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/vmreg_riscv.hpp
-@@ -34,10 +34,6 @@ inline bool is_FloatRegister() {
-   return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr;
- }
- 
--inline bool is_VectorRegister() {
--  return value() >= ConcreteRegisterImpl::max_fpr && value() < ConcreteRegisterImpl::max_vpr;
--}
--
- inline Register as_Register() {
-   assert(is_Register(), "must be");
-   return ::as_Register(value() / RegisterImpl::max_slots_per_register);
-@@ -49,20 +45,9 @@ inline FloatRegister as_FloatRegister() {
-                             FloatRegisterImpl::max_slots_per_register);
- }
- 
--inline VectorRegister as_VectorRegister() {
--  assert(is_VectorRegister() && ((value() & (VectorRegisterImpl::max_slots_per_register - 1)) == 0), "must be");
--  return ::as_VectorRegister((value() - ConcreteRegisterImpl::max_fpr) /
--                             VectorRegisterImpl::max_slots_per_register);
--}
--
- inline bool is_concrete() {
-   assert(is_reg(), "must be");
--  if (is_VectorRegister()) {
--    int base = value() - ConcreteRegisterImpl::max_fpr;
--    return (base % VectorRegisterImpl::max_slots_per_register) == 0;
--  } else {
--    return is_even(value());
--  }
-+  return is_even(value());
- }
- 
- #endif // CPU_RISCV_VMREG_RISCV_HPP
-
-From b2011bad9b7404c1f6d0c1aa3176569d7f07d7a9 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Mon, 27 Mar 2023 16:05:55 +0800
-Subject: [PATCH 004/140] Revert: JDK-8253180: ZGC: Implementation of JEP 376:
- ZGC: Concurrent Thread-Stack Processing JDK-8220051: Remove global safepoint
- code
-
----
- src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp  | 14 ------
- .../cpu/riscv/c1_LIRAssembler_riscv.cpp       | 14 +++---
- .../riscv/c2_safepointPollStubTable_riscv.cpp | 47 ------------------
- src/hotspot/cpu/riscv/frame_riscv.cpp         |  9 +---
- src/hotspot/cpu/riscv/interp_masm_riscv.cpp   | 19 +-------
- .../cpu/riscv/macroAssembler_riscv.cpp        | 48 +++++++++++--------
- .../cpu/riscv/macroAssembler_riscv.hpp        |  5 +-
- src/hotspot/cpu/riscv/riscv.ad                | 14 ++----
- src/hotspot/cpu/riscv/vm_version_riscv.hpp    |  2 -
- 9 files changed, 45 insertions(+), 127 deletions(-)
- delete mode 100644 src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp
-
-diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
-index dcd0472c540..af7bd067f33 100644
---- a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
-@@ -39,20 +39,6 @@
- 
- #define __ ce->masm()->
- 
--void C1SafepointPollStub::emit_code(LIR_Assembler* ce) {
--  __ bind(_entry);
--  InternalAddress safepoint_pc(__ pc() - __ offset() + safepoint_offset());
--  __ code_section()->relocate(__ pc(), safepoint_pc.rspec());
--  __ la(t0, safepoint_pc.target());
--  __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset()));
--
--  assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
--         "polling page return stub not created yet");
--  address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
--
--  __ far_jump(RuntimeAddress(stub));
--}
--
- void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
-   __ bind(_entry);
-   Metadata *m = _method->as_constant_ptr()->as_metadata();
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-index bba3bd4709c..0e383a3c139 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-@@ -424,7 +424,7 @@ int LIR_Assembler::emit_deopt_handler() {
-   return offset;
- }
- 
--void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) {
-+void LIR_Assembler::return_op(LIR_Opr result) {
-   assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == x10, "word returns are in x10");
- 
-   // Pop the stack before the safepoint code
-@@ -434,18 +434,20 @@ void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) {
-     __ reserved_stack_check();
-   }
- 
--  code_stub->set_safepoint_offset(__ offset());
--  __ relocate(relocInfo::poll_return_type);
--  __ safepoint_poll(*code_stub->entry(), true /* at_return */, false /* acquire */, true /* in_nmethod */);
-+  address polling_page(os::get_polling_page());
-+  __ read_polling_page(t0, polling_page, relocInfo::poll_return_type);
-   __ ret();
- }
- 
- int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
-+  address polling_page(os::get_polling_page());
-   guarantee(info != NULL, "Shouldn't be NULL");
--  __ get_polling_page(t0, relocInfo::poll_type);
-+  assert(os::is_poll_address(polling_page), "should be");
-+  int32_t offset = 0;
-+  __ get_polling_page(t0, polling_page, offset, relocInfo::poll_type);
-   add_debug_info_for_branch(info);  // This isn't just debug info:
-                                     // it's the oop map
--  __ read_polling_page(t0, 0, relocInfo::poll_type);
-+  __ read_polling_page(t0, offset, relocInfo::poll_type);
-   return __ offset();
- }
- 
-diff --git a/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp b/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp
-deleted file mode 100644
-index a90d9fdc160..00000000000
---- a/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp
-+++ /dev/null
-@@ -1,47 +0,0 @@
--/*
-- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#include "precompiled.hpp"
--#include "asm/macroAssembler.hpp"
--#include "opto/compile.hpp"
--#include "opto/node.hpp"
--#include "opto/output.hpp"
--#include "runtime/sharedRuntime.hpp"
--
--#define __ masm.
--void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointPollStub* entry) const {
--  assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
--         "polling page return stub not created yet");
--  address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
--  RuntimeAddress callback_addr(stub);
--
--  __ bind(entry->_stub_label);
--  InternalAddress safepoint_pc(masm.pc() - masm.offset() + entry->_safepoint_offset);
--  masm.code_section()->relocate(masm.pc(), safepoint_pc.rspec());
--  __ la(t0, safepoint_pc.target());
--  __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset()));
--  __ far_jump(callback_addr);
--}
--#undef __
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
-index 6e38960598a..41e52a4d491 100644
---- a/src/hotspot/cpu/riscv/frame_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
-@@ -39,7 +39,6 @@
- #include "runtime/monitorChunk.hpp"
- #include "runtime/os.inline.hpp"
- #include "runtime/signature.hpp"
--#include "runtime/stackWatermarkSet.hpp"
- #include "runtime/stubCodeGenerator.hpp"
- #include "runtime/stubRoutines.hpp"
- #include "vmreg_riscv.inline.hpp"
-@@ -509,13 +508,7 @@ frame frame::sender_raw(RegisterMap* map) const {
- }
- 
- frame frame::sender(RegisterMap* map) const {
--  frame result = sender_raw(map);
--
--  if (map->process_frames()) {
--    StackWatermarkSet::on_iteration(map->thread(), result);
--  }
--
--  return result;
-+  return sender_raw(map);
- }
- 
- bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
-diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-index d12dcb2af19..9090ad0c058 100644
---- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-@@ -519,7 +519,7 @@ void InterpreterMacroAssembler::dispatch_base(TosState state,
- 
-   if (needs_thread_local_poll) {
-     NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
--    ld(t1, Address(xthread, JavaThread::polling_word_offset()));
-+    ld(t1, Address(xthread, Thread::polling_page_offset()));
-     andi(t1, t1, SafepointMechanism::poll_bit());
-     bnez(t1, safepoint);
-   }
-@@ -591,23 +591,6 @@ void InterpreterMacroAssembler::remove_activation(
-   // result check if synchronized method
-   Label unlocked, unlock, no_unlock;
- 
--  // The below poll is for the stack watermark barrier. It allows fixing up frames lazily,
--  // that would normally not be safe to use. Such bad returns into unsafe territory of
--  // the stack, will call InterpreterRuntime::at_unwind.
--  Label slow_path;
--  Label fast_path;
--  safepoint_poll(slow_path, true /* at_return */, false /* acquire */, false /* in_nmethod */);
--  j(fast_path);
--
--  bind(slow_path);
--  push(state);
--  set_last_Java_frame(esp, fp, (address)pc(), t0);
--  super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), xthread);
--  reset_last_Java_frame(true);
--  pop(state);
--
--  bind(fast_path);
--
-   // get the value of _do_not_unlock_if_synchronized into x13
-   const Address do_not_unlock_if_synchronized(xthread,
-     in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index 8b8d126f6c9..4b6136ae36b 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -2122,15 +2122,16 @@ void MacroAssembler::check_klass_subtype(Register sub_klass,
- }
- 
- void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) {
--  ld(t0, Address(xthread, JavaThread::polling_word_offset()));
--  if (acquire) {
--    membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
--  }
--  if (at_return) {
--    bgtu(in_nmethod ? sp : fp, t0, slow_path, true /* is_far */);
-+  if (SafepointMechanism::uses_thread_local_poll()) {
-+    ld(t1, Address(xthread, Thread::polling_page_offset()));
-+    andi(t0, t1, SafepointMechanism::poll_bit());
-+    bnez(t0, slow_path);
-   } else {
--    andi(t0, t0, SafepointMechanism::poll_bit());
--    bnez(t0, slow_path, true /* is_far */);
-+    int32_t offset = 0;
-+    la_patchable(t0, ExternalAddress(SafepointSynchronize::address_of_state()), offset);
-+    lwu(t0, Address(t0, offset));
-+    assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code");
-+    bnez(t0, slow_path);
-   }
- }
- 
-@@ -2752,22 +2753,29 @@ void MacroAssembler::reserved_stack_check() {
- }
- 
- // Move the address of the polling page into dest.
--void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) {
--  ld(dest, Address(xthread, JavaThread::polling_page_offset()));
-+void MacroAssembler::get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype) {
-+  if (SafepointMechanism::uses_thread_local_poll()) {
-+    ld(dest, Address(xthread, Thread::polling_page_offset()));
-+  } else {
-+    uint64_t align = (uint64_t)page & 0xfff;
-+    assert(align == 0, "polling page must be page aligned");
-+    la_patchable(dest, Address(page, rtype), offset);
-+  }
- }
- 
- // Read the polling page.  The address of the polling page must
- // already be in r.
--address MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) {
--  address mark;
--  {
--    InstructionMark im(this);
--    code_section()->relocate(inst_mark(), rtype);
--    lwu(zr, Address(r, offset));
--    mark = inst_mark();
--  }
--  verify_cross_modify_fence_not_required();
--  return mark;
-+void MacroAssembler::read_polling_page(Register dest, address page, relocInfo::relocType rtype) {
-+  int32_t offset = 0;
-+  get_polling_page(dest, page, offset, rtype);
-+  read_polling_page(dest, offset, rtype);
-+}
-+
-+// Read the polling page.  The address of the polling page must
-+// already be in r.
-+void MacroAssembler::read_polling_page(Register dest, int32_t offset, relocInfo::relocType rtype) {
-+  code_section()->relocate(pc(), rtype);
-+  lwu(zr, Address(dest, offset));
- }
- 
- void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index b43131514c1..041c696add6 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -625,8 +625,9 @@ class MacroAssembler: public Assembler {
- 
-   void reserved_stack_check();
- 
--  void get_polling_page(Register dest, relocInfo::relocType rtype);
--  address read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
-+  void get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype);
-+  void read_polling_page(Register r, address page, relocInfo::relocType rtype);
-+  void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
- 
-   address trampoline_call(Address entry, CodeBuffer* cbuf = NULL);
-   address ic_call(address entry, jint method_index = 0);
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 85593a942e9..996fa1fb68f 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1132,9 +1132,9 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
-   }
- 
-   if (do_polling() && C->is_method_compilation()) {
--    st->print("# test polling word\n\t");
--    st->print("ld t0, [xthread,#%d]\n\t", in_bytes(JavaThread::polling_word_offset()));
--    st->print("bgtu sp, t0, #slow_path");
-+    st->print("# touch polling page\n\t");
-+    st->print("li  t0, #0x%lx\n\t", p2i(os::get_polling_page()));
-+    st->print("ld  zr, [t0]");
-   }
- }
- #endif
-@@ -1153,13 +1153,7 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-   }
- 
-   if (do_polling() && C->is_method_compilation()) {
--    Label dummy_label;
--    Label* code_stub = &dummy_label;
--    if (!C->output()->in_scratch_emit_size()) {
--      code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
--    }
--    __ relocate(relocInfo::poll_return_type);
--    __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
-+    __ read_polling_page(t0, os::get_polling_page(), relocInfo::poll_return_type);
-   }
- }
- 
-diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
-index 8e35530359a..7586af01d99 100644
---- a/src/hotspot/cpu/riscv/vm_version_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
-@@ -48,8 +48,6 @@ class VM_Version : public Abstract_VM_Version {
-   // Initialization
-   static void initialize();
- 
--  constexpr static bool supports_stack_watermark_barrier() { return true; }
--
-   enum Feature_Flag {
- #define CPU_FEATURE_FLAGS(decl)               \
-     decl(I,            "i",            8)     \
-
-From a032c615883fe2bd557baf40f1439cbae55be206 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Mon, 1 May 2023 15:42:09 +0800
-Subject: [PATCH 005/140] Revert JDK-8221554: aarch64 cross-modifying code
-
----
- .../cpu/riscv/macroAssembler_riscv.cpp        | 22 -------------------
- .../cpu/riscv/macroAssembler_riscv.hpp        |  2 --
- 2 files changed, 24 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index 4b6136ae36b..269d76ba69e 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -2716,7 +2716,6 @@ void MacroAssembler::build_frame(int framesize) {
-   sd(fp, Address(sp, framesize - 2 * wordSize));
-   sd(ra, Address(sp, framesize - wordSize));
-   if (PreserveFramePointer) { add(fp, sp, framesize); }
--  verify_cross_modify_fence_not_required();
- }
- 
- void MacroAssembler::remove_frame(int framesize) {
-@@ -3935,26 +3934,5 @@ void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Registe
- 
- void MacroAssembler::safepoint_ifence() {
-   ifence();
--#ifndef PRODUCT
--  if (VerifyCrossModifyFence) {
--    // Clear the thread state.
--    sb(zr, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset())));
--  }
--#endif
- }
- 
--#ifndef PRODUCT
--void MacroAssembler::verify_cross_modify_fence_not_required() {
--  if (VerifyCrossModifyFence) {
--    // Check if thread needs a cross modify fence.
--    lbu(t0, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset())));
--    Label fence_not_required;
--    beqz(t0, fence_not_required);
--    // If it does then fail.
--    la(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::verify_cross_modify_fence_failure)));
--    mv(c_rarg0, xthread);
--    jalr(t0);
--    bind(fence_not_required);
--  }
--}
--#endif
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index 041c696add6..b59bdadb8bf 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -821,8 +821,6 @@ class MacroAssembler: public Assembler {
-   void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire);
-   void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release);
- 
--  // Check the current thread doesn't need a cross modify fence.
--  void verify_cross_modify_fence_not_required() PRODUCT_RETURN;
- };
- 
- #ifdef ASSERT
-
-From fd89cf689015649a5cb850e1e24dcbb7bb59735a Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 21:11:30 +0800
-Subject: [PATCH 006/140] Revert JDK-8242263: Diagnose synchronization on
- primitive wrappers
-
----
- src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp | 7 -------
- src/hotspot/cpu/riscv/interp_masm_riscv.cpp       | 7 -------
- src/hotspot/cpu/riscv/riscv.ad                    | 7 -------
- 3 files changed, 21 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-index 6f656c8c533..348546a9ea0 100644
---- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-@@ -64,13 +64,6 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr
- 
-   null_check_offset = offset();
- 
--  if (DiagnoseSyncOnValueBasedClasses != 0) {
--    load_klass(hdr, obj);
--    lwu(hdr, Address(hdr, Klass::access_flags_offset()));
--    andi(t0, hdr, JVM_ACC_IS_VALUE_BASED_CLASS);
--    bnez(t0, slow_case, true /* is_far */);
--  }
--
-   // Load object header
-   ld(hdr, Address(obj, hdr_offset));
-   // and mark it as unlocked
-diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-index 9090ad0c058..8adc7b1320d 100644
---- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-@@ -782,13 +782,6 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg)
-     // Load object pointer into obj_reg c_rarg3
-     ld(obj_reg, Address(lock_reg, obj_offset));
- 
--    if (DiagnoseSyncOnValueBasedClasses != 0) {
--      load_klass(tmp, obj_reg);
--      lwu(tmp, Address(tmp, Klass::access_flags_offset()));
--      andi(tmp, tmp, JVM_ACC_IS_VALUE_BASED_CLASS);
--      bnez(tmp, slow_case);
--    }
--
-     // Load (object->mark() | 1) into swap_reg
-     ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-     ori(swap_reg, t0, 1);
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 996fa1fb68f..2eefc71dde0 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1965,13 +1965,6 @@ encode %{
-     // Load markWord from object into displaced_header.
-     __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
- 
--    if (DiagnoseSyncOnValueBasedClasses != 0) {
--      __ load_klass(flag, oop);
--      __ lwu(flag, Address(flag, Klass::access_flags_offset()));
--      __ andi(flag, flag, JVM_ACC_IS_VALUE_BASED_CLASS, tmp /* tmp */);
--      __ bnez(flag, cont, true /* is_far */);
--    }
--
-     // Check for existing monitor
-     __ andi(t0, disp_hdr, markWord::monitor_value);
-     __ bnez(t0, object_has_monitor);
-
-From feea78c5a227c0a57e57d6d1d544a14682310053 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 16:24:12 +0800
-Subject: [PATCH 007/140] Revert JDK-8278104: C1 should support the compiler
- directive 'BreakAtExecute'
-
----
- src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-index 348546a9ea0..e5ed25616d6 100644
---- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-@@ -311,7 +311,7 @@ void C1_MacroAssembler::remove_frame(int framesize) {
- }
- 
- 
--void C1_MacroAssembler::verified_entry(bool breakAtEntry) {
-+void C1_MacroAssembler::verified_entry() {
-   // If we have to make this method not-entrant we'll overwrite its
-   // first instruction with a jump. For this action to be legal we
-   // must ensure that this first instruction is a J, JAL or NOP.
-
-From 651009a5783f6f5150b3e75a50069dc841622d33 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sat, 29 Apr 2023 15:57:14 +0800
-Subject: [PATCH 008/140] Revert: JDK-8234562: Move
- OrderAccess::release_store*/load_acquire to Atomic JDK-8234736: Harmonize
- parameter order in Atomic - store JDK-8234737: Harmonize parameter order in
- Atomic - add JDK-8234740: Harmonize parameter order in Atomic - cmpxchg
- JDK-8234739: Harmonize parameter order in Atomic - xchg JDK-8236778: Add
- Atomic::fetch_and_add
-
----
- .../os_cpu/linux_riscv/atomic_linux_riscv.hpp | 51 +++++++------------
- .../linux_riscv/orderAccess_linux_riscv.hpp   | 31 +++++++----
- 2 files changed, 39 insertions(+), 43 deletions(-)
-
-diff --git a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
-index 761da5d743e..9b8b1a31774 100644
---- a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
-+++ b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
-@@ -33,25 +33,31 @@
- // Note that memory_order_conservative requires a full barrier after atomic stores.
- // See https://patchwork.kernel.org/patch/3575821/
- 
-+#define FULL_MEM_BARRIER  __sync_synchronize()
-+#define READ_MEM_BARRIER  __atomic_thread_fence(__ATOMIC_ACQUIRE);
-+#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE);
-+
- template<size_t byte_size>
--struct Atomic::PlatformAdd {
--  template<typename D, typename I>
--  D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const {
-+struct Atomic::PlatformAdd
-+  : Atomic::FetchAndAdd<Atomic::PlatformAdd<byte_size> >
-+{
-+  template<typename I, typename D>
-+  D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const {
-     D res = __atomic_add_fetch(dest, add_value, __ATOMIC_RELEASE);
-     FULL_MEM_BARRIER;
-     return res;
-   }
- 
--  template<typename D, typename I>
--  D fetch_and_add(D volatile* dest, I add_value, atomic_memory_order order) const {
--    return add_and_fetch(dest, add_value, order) - add_value;
-+  template<typename I, typename D>
-+  D fetch_and_add(I add_value, D volatile* dest, atomic_memory_order order) const {
-+    return add_and_fetch(add_value, dest, order) - add_value;
-   }
- };
- 
- template<size_t byte_size>
- template<typename T>
--inline T Atomic::PlatformXchg<byte_size>::operator()(T volatile* dest,
--                                                     T exchange_value,
-+inline T Atomic::PlatformXchg<byte_size>::operator()(T exchange_value,
-+                                                     T volatile* dest,
-                                                      atomic_memory_order order) const {
-   STATIC_ASSERT(byte_size == sizeof(T));
-   T res = __atomic_exchange_n(dest, exchange_value, __ATOMIC_RELEASE);
-@@ -62,9 +68,9 @@ inline T Atomic::PlatformXchg<byte_size>::operator()(T volatile* dest,
- // __attribute__((unused)) on dest is to get rid of spurious GCC warnings.
- template<size_t byte_size>
- template<typename T>
--inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T volatile* dest __attribute__((unused)),
-+inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T exchange_value,
-+                                                        T volatile* dest __attribute__((unused)),
-                                                         T compare_value,
--                                                        T exchange_value,
-                                                         atomic_memory_order order) const {
-   STATIC_ASSERT(byte_size == sizeof(T));
-   T value = compare_value;
-@@ -83,9 +89,9 @@ inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T volatile* dest __attri
- 
- template<>
- template<typename T>
--inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest __attribute__((unused)),
-+inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
-+                                                T volatile* dest __attribute__((unused)),
-                                                 T compare_value,
--                                                T exchange_value,
-                                                 atomic_memory_order order) const {
-   STATIC_ASSERT(4 == sizeof(T));
-   if (order != memory_order_relaxed) {
-@@ -110,25 +116,4 @@ inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest __attribute__((
-   return rv;
- }
- 
--template<size_t byte_size>
--struct Atomic::PlatformOrderedLoad<byte_size, X_ACQUIRE>
--{
--  template <typename T>
--  T operator()(const volatile T* p) const { T data; __atomic_load(const_cast<T*>(p), &data, __ATOMIC_ACQUIRE); return data; }
--};
--
--template<size_t byte_size>
--struct Atomic::PlatformOrderedStore<byte_size, RELEASE_X>
--{
--  template <typename T>
--  void operator()(volatile T* p, T v) const { __atomic_store(const_cast<T*>(p), &v, __ATOMIC_RELEASE); }
--};
--
--template<size_t byte_size>
--struct Atomic::PlatformOrderedStore<byte_size, RELEASE_X_FENCE>
--{
--  template <typename T>
--  void operator()(volatile T* p, T v) const { release_store(p, v); OrderAccess::fence(); }
--};
--
- #endif // OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
-diff --git a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
-index 1c33dc1e87f..5b5d35553f7 100644
---- a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
-+++ b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
-@@ -37,10 +37,6 @@ inline void OrderAccess::storestore() { release(); }
- inline void OrderAccess::loadstore()  { acquire(); }
- inline void OrderAccess::storeload()  { fence(); }
- 
--#define FULL_MEM_BARRIER  __sync_synchronize()
--#define READ_MEM_BARRIER  __atomic_thread_fence(__ATOMIC_ACQUIRE);
--#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE);
--
- inline void OrderAccess::acquire() {
-   READ_MEM_BARRIER;
- }
-@@ -53,11 +49,26 @@ inline void OrderAccess::fence() {
-   FULL_MEM_BARRIER;
- }
- 
--inline void OrderAccess::cross_modify_fence_impl() {
--  asm volatile("fence.i" : : : "memory");
--  if (UseConservativeFence) {
--    asm volatile("fence ir, ir" : : : "memory");
--  }
--}
-+
-+template<size_t byte_size>
-+struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE>
-+{
-+  template <typename T>
-+  T operator()(const volatile T* p) const { T data; __atomic_load(const_cast<T*>(p), &data, __ATOMIC_ACQUIRE); return data; }
-+};
-+
-+template<size_t byte_size>
-+struct OrderAccess::PlatformOrderedStore<byte_size, RELEASE_X>
-+{
-+  template <typename T>
-+  void operator()(T v, volatile T* p) const { __atomic_store(const_cast<T*>(p), &v, __ATOMIC_RELEASE); }
-+};
-+
-+template<size_t byte_size>
-+struct OrderAccess::PlatformOrderedStore<byte_size, RELEASE_X_FENCE>
-+{
-+  template <typename T>
-+  void operator()(T v, volatile T* p) const { release_store(p, v); OrderAccess::fence(); }
-+};
- 
- #endif // OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP
-
-From b078a2ec01598fbcd99aea61af15d44f9c884aaa Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 21:07:42 +0800
-Subject: [PATCH 009/140] Revert JDK-8229258: Rework markOop and markOopDesc
- into a simpler mark word value carrier
-
----
- .../cpu/riscv/c1_MacroAssembler_riscv.cpp     |  4 ++--
- .../shenandoahBarrierSetAssembler_riscv.cpp   |  4 ++--
- src/hotspot/cpu/riscv/riscv.ad                | 22 +++++++++----------
- src/hotspot/cpu/riscv/templateTable_riscv.cpp |  2 +-
- 4 files changed, 16 insertions(+), 16 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-index e5ed25616d6..2d52343587e 100644
---- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-@@ -67,7 +67,7 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr
-   // Load object header
-   ld(hdr, Address(obj, hdr_offset));
-   // and mark it as unlocked
--  ori(hdr, hdr, markWord::unlocked_value);
-+  ori(hdr, hdr, markOopDesc::unlocked_value);
-   // save unlocked object header into the displaced header location on the stack
-   sd(hdr, Address(disp_hdr, 0));
-   // test if object header is still the same (i.e. unlocked), and if so, store the
-@@ -141,7 +141,7 @@ void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, i
- void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2) {
-   assert_different_registers(obj, klass, len);
-   // This assumes that all prototype bits fitr in an int32_t
--  mv(tmp1, (int32_t)(intptr_t)markWord::prototype().value());
-+  mv(tmp1, (int32_t)(intptr_t)markOopDesc::prototype());
-   sd(tmp1, Address(obj, oopDesc::mark_offset_in_bytes()));
- 
-   if (UseCompressedClassPointers) { // Take care not to kill klass
-diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
-index d0ac6e52436..84e1205bc25 100644
---- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
-@@ -216,9 +216,9 @@ void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssemb
-   Label done;
-   __ ld(tmp, Address(dst, oopDesc::mark_offset_in_bytes()));
-   __ xori(tmp, tmp, -1); // eon with 0 is equivalent to XOR with -1
--  __ andi(t2, tmp, markWord::lock_mask_in_place);
-+  __ andi(t2, tmp, markOopDesc::lock_mask_in_place);
-   __ bnez(t2, done);
--  __ ori(tmp, tmp, markWord::marked_value);
-+  __ ori(tmp, tmp, markOopDesc::marked_value);
-   __ xori(dst, tmp, -1); // eon with 0 is equivalent to XOR with -1
-   __ bind(done);
- 
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 2eefc71dde0..44ab44dece1 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1966,12 +1966,12 @@ encode %{
-     __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
- 
-     // Check for existing monitor
--    __ andi(t0, disp_hdr, markWord::monitor_value);
-+    __ andi(t0, disp_hdr, markOopDesc::monitor_value);
-     __ bnez(t0, object_has_monitor);
- 
-     if (!UseHeavyMonitors) {
-       // Set tmp to be (markWord of object | UNLOCK_VALUE).
--      __ ori(tmp, disp_hdr, markWord::unlocked_value);
-+      __ ori(tmp, disp_hdr, markOopDesc::unlocked_value);
- 
-       // Initialize the box. (Must happen before we update the object mark!)
-       __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
-@@ -1993,7 +1993,7 @@ encode %{
-       // Check if the owner is self by comparing the value in the
-       // markWord of object (disp_hdr) with the stack pointer.
-       __ sub(disp_hdr, disp_hdr, sp);
--      __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markWord::lock_mask_in_place));
-+      __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place));
-       // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont,
-       // hence we can store 0 as the displaced header in the box, which indicates that it is a
-       // recursive lock.
-@@ -2012,15 +2012,15 @@ encode %{
-     // otherwise m->owner may contain a thread or a stack address.
-     //
-     // Try to CAS m->owner from NULL to current thread.
--    __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markWord::monitor_value));
-+    __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value));
-     __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq,
-              Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected)
- 
-     // Store a non-null value into the box to avoid looking like a re-entrant
-     // lock. The fast-path monitor unlock code checks for
--    // markWord::monitor_value so use markWord::unused_mark which has the
-+    // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the
-     // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
--    __ mv(tmp, (address)markWord::unused_mark().value());
-+    __ mv(tmp, (address)markOopDesc::unused_mark());
-     __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
- 
-     __ beqz(flag, cont); // CAS success means locking succeeded
-@@ -2029,9 +2029,9 @@ encode %{
- 
-     // Recursive lock case
-     __ mv(flag, zr);
--    __ ld(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value));
-+    __ ld(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markOopDesc::monitor_value));
-     __ add(tmp, tmp, 1u);
--    __ sd(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value));
-+    __ sd(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markOopDesc::monitor_value));
- 
-     __ bind(cont);
-   %}
-@@ -2060,7 +2060,7 @@ encode %{
- 
-     // Handle existing monitor.
-     __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
--    __ andi(t0, disp_hdr, markWord::monitor_value);
-+    __ andi(t0, disp_hdr, markOopDesc::monitor_value);
-     __ bnez(t0, object_has_monitor);
- 
-     if (!UseHeavyMonitors) {
-@@ -2080,8 +2080,8 @@ encode %{
- 
-     // Handle existing monitor.
-     __ bind(object_has_monitor);
--    STATIC_ASSERT(markWord::monitor_value <= INT_MAX);
--    __ add(tmp, tmp, -(int)markWord::monitor_value); // monitor
-+    STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX);
-+    __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor
-     __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
- 
-     Label notRecursive;
-diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-index d2a301c6e74..4e388ac4eaa 100644
---- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-@@ -3559,7 +3559,7 @@ void TemplateTable::_new() {
- 
-     // initialize object hader only.
-     __ bind(initialize_header);
--    __ mv(t0, (intptr_t)markWord::prototype().value());
-+    __ mv(t0, (intptr_t)markOopDesc::prototype());
-     __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes()));
-     __ store_klass_gap(x10, zr);   // zero klass gap for compressed oops
-     __ store_klass(x10, x14);      // store klass last
-
-From 4b27cd8d4cfa8fb5f0f78aecaebb17d19362f300 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Fri, 31 Mar 2023 16:24:36 +0800
-Subject: [PATCH 010/140] Revert: JDK-8239895: assert(_stack_base != 0LL)
- failed: Sanity check JDK-8238988: Rename thread "in stack" methods and add
- in_stack_range JDK-8234372: Investigate use of Thread::stack_base() and
- queries for "in stack" JDK-8203481: Incorrect constraint for unextended_sp in
- frame:safe_for_sender
-
----
- src/hotspot/cpu/riscv/frame_riscv.cpp | 32 +++++++++++++++++++--------
- 1 file changed, 23 insertions(+), 9 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
-index 41e52a4d491..8e7babe2c61 100644
---- a/src/hotspot/cpu/riscv/frame_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
-@@ -56,13 +56,21 @@ void RegisterMap::check_location_valid() {
- // Profiling/safepoint support
- 
- bool frame::safe_for_sender(JavaThread *thread) {
--  address   addr_sp = (address)_sp;
--  address   addr_fp = (address)_fp;
-+  address   sp = (address)_sp;
-+  address   fp = (address)_fp;
-   address   unextended_sp = (address)_unextended_sp;
- 
-   // consider stack guards when trying to determine "safe" stack pointers
-+  static size_t stack_guard_size = os::uses_stack_guard_pages() ?
-+                                   (JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size()) : 0;
-+  size_t usable_stack_size = thread->stack_size() - stack_guard_size;
-+
-   // sp must be within the usable part of the stack (not in guards)
--  if (!thread->is_in_usable_stack(addr_sp)) {
-+  bool sp_safe = (sp < thread->stack_base()) &&
-+                 (sp >= thread->stack_base() - usable_stack_size);
-+
-+
-+  if (!sp_safe) {
-     return false;
-   }
- 
-@@ -79,14 +87,15 @@ bool frame::safe_for_sender(JavaThread *thread) {
-   // So unextended sp must be within the stack but we need not to check
-   // that unextended sp >= sp
- 
--  if (!thread->is_in_full_stack_checked(unextended_sp)) {
-+  bool unextended_sp_safe = (unextended_sp < thread->stack_base());
-+
-+  if (!unextended_sp_safe) {
-     return false;
-   }
- 
-   // an fp must be within the stack and above (but not equal) sp
-   // second evaluation on fp+ is added to handle situation where fp is -1
--  bool fp_safe = thread->is_in_stack_range_excl(addr_fp, addr_sp) &&
--                 thread->is_in_full_stack_checked(addr_fp + (return_addr_offset * sizeof(void*)));
-+  bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base())));
- 
-   // We know sp/unextended_sp are safe only fp is questionable here
- 
-@@ -147,7 +156,7 @@ bool frame::safe_for_sender(JavaThread *thread) {
- 
-       sender_sp = _unextended_sp + _cb->frame_size();
-       // Is sender_sp safe?
--      if (!thread->is_in_full_stack_checked((address)sender_sp)) {
-+      if ((address)sender_sp >= thread->stack_base()) {
-         return false;
-       }
- 
-@@ -163,7 +172,10 @@ bool frame::safe_for_sender(JavaThread *thread) {
-       // fp is always saved in a recognizable place in any code we generate. However
-       // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved fp
-       // is really a frame pointer.
--      if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) {
-+
-+      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
-+
-+      if (!saved_fp_safe) {
-         return false;
-       }
- 
-@@ -196,7 +208,9 @@ bool frame::safe_for_sender(JavaThread *thread) {
- 
-     // Could be the call_stub
-     if (StubRoutines::returns_to_call_stub(sender_pc)) {
--      if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) {
-+      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
-+
-+      if (!saved_fp_safe) {
-         return false;
-       }
- 
-
-From d1b463b6c00c75664a49719f75bef8e6408f12df Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Fri, 31 Mar 2023 17:10:33 +0800
-Subject: [PATCH 011/140] Revert JDK-8173585: Intrinsify
- StringLatin1.indexOf(char)
-
----
- src/hotspot/cpu/riscv/riscv.ad | 19 -------------------
- 1 file changed, 19 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 44ab44dece1..8c7a8ede815 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -9826,7 +9826,6 @@ instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
-                               iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
- %{
-   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
--  predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U);
-   effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
-          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
- 
-@@ -9840,24 +9839,6 @@ instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
- %}
- 
- 
--instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
--                              iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
--                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
--%{
--  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
--  predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L);
--  effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
--         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
--
--  format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %}
--  ins_encode %{
--    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
--                           $result$$Register, $tmp1$$Register, $tmp2$$Register,
--                           $tmp3$$Register, $tmp4$$Register, true /* isL */);
--  %}
--  ins_pipe(pipe_class_memory);
--%}
--
- // clearing of an array
- instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy)
- %{
-
-From a0cdf8dfb05dbff34d2ca23104d08ae21b2d7f70 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 4 Apr 2023 12:25:36 +0800
-Subject: [PATCH 012/140] Revert JDK-8281632: riscv: Improve interpreter stack
- banging, and change the register t1->t0
-
----
- .../templateInterpreterGenerator_riscv.cpp    | 42 ++++---------------
- 1 file changed, 8 insertions(+), 34 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-index 6537b2dbd94..76ae6f89e27 100644
---- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-@@ -895,42 +895,16 @@ address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(Abstract
- }
- 
- void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
--  // See more discussion in stackOverflow.hpp.
--
--  const int shadow_zone_size = checked_cast<int>(StackOverflow::stack_shadow_zone_size());
-+  // Bang each page in the shadow zone. We can't assume it's been done for
-+  // an interpreter frame with greater than a page of locals, so each page
-+  // needs to be checked.  Only true for non-native.
-+  const int n_shadow_pages = JavaThread::stack_shadow_zone_size() / os::vm_page_size();
-+  const int start_page = native_call ? n_shadow_pages : 1;
-   const int page_size = os::vm_page_size();
--  const int n_shadow_pages = shadow_zone_size / page_size;
--
--#ifdef ASSERT
--  Label L_good_limit;
--  __ ld(t0, Address(xthread, JavaThread::shadow_zone_safe_limit()));
--  __ bnez(t0, L_good_limit);
--  __ stop("shadow zone safe limit is not initialized");
--  __ bind(L_good_limit);
--
--  Label L_good_watermark;
--  __ ld(t0, Address(xthread, JavaThread::shadow_zone_growth_watermark()));
--  __ bnez(t0, L_good_watermark);
--  __ stop("shadow zone growth watermark is not initialized");
--  __ bind(L_good_watermark);
--#endif
--
--  Label L_done;
--
--  __ ld(t0, Address(xthread, JavaThread::shadow_zone_growth_watermark()));
--  __ bgtu(sp, t0, L_done);
--
--  for (int p = 1; p <= n_shadow_pages; p++) {
--    __ bang_stack_with_offset(p * page_size);
-+  for (int pages = start_page; pages <= n_shadow_pages ; pages++) {
-+    __ sub(t0, sp, pages * page_size);
-+    __ sd(zr, Address(t0));
-   }
--
--  // Record the new watermark, but only if the update is above the safe limit.
--  // Otherwise, the next time around the check above would pass the safe limit.
--  __ ld(t0, Address(xthread, JavaThread::shadow_zone_safe_limit()));
--  __ bleu(sp, t0, L_done);
--  __ sd(sp, Address(xthread, JavaThread::shadow_zone_growth_watermark()));
--
--  __ bind(L_done);
- }
- 
- // Interpreter stub for calling a native method. (asm interpreter)
-
-From 8db4bf1400d92c80a0adef8a5ec12adbf595c03f Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 4 Apr 2023 14:56:25 +0800
-Subject: [PATCH 013/140] Port aarch64 style sig handler from
- os_linux_aarch64.cpp
-
----
- .../os_cpu/linux_riscv/os_linux_riscv.cpp     | 224 +++++++++++++-----
- 1 file changed, 168 insertions(+), 56 deletions(-)
-
-diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
-index 1f46bbab0a2..db15f1946e2 100644
---- a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
-+++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
-@@ -48,7 +48,6 @@
- #include "runtime/stubRoutines.hpp"
- #include "runtime/thread.inline.hpp"
- #include "runtime/timer.hpp"
--#include "signals_posix.hpp"
- #include "utilities/debug.hpp"
- #include "utilities/events.hpp"
- #include "utilities/vmError.hpp"
-@@ -172,31 +171,138 @@ NOINLINE frame os::current_frame() {
- }
- 
- // Utility functions
--bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
--                                             ucontext_t* uc, JavaThread* thread) {
-+extern "C" JNIEXPORT int
-+JVM_handle_linux_signal(int sig,
-+                        siginfo_t* info,
-+                        void* ucVoid,
-+                        int abort_if_unrecognized) {
-+  ucontext_t* uc = (ucontext_t*) ucVoid;
-+
-+  Thread* t = Thread::current_or_null_safe();
-+
-+  // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away
-+  // (no destructors can be run)
-+  os::ThreadCrashProtection::check_crash_protection(sig, t);
-+
-+  SignalHandlerMark shm(t);
-+
-+  // Note: it's not uncommon that JNI code uses signal/sigset to install
-+  // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
-+  // or have a SIGILL handler when detecting CPU type). When that happens,
-+  // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To
-+  // avoid unnecessary crash when libjsig is not preloaded, try handle signals
-+  // that do not require siginfo/ucontext first.
-+
-+  if (sig == SIGPIPE || sig == SIGXFSZ) {
-+    // allow chained handler to go first
-+    if (os::Linux::chained_handler(sig, info, ucVoid)) {
-+      return true;
-+    } else {
-+      // Ignoring SIGPIPE/SIGXFSZ - see bugs 4229104 or 6499219
-+      return true;
-+    }
-+  }
-+
-+#ifdef CAN_SHOW_REGISTERS_ON_ASSERT
-+  if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) {
-+    if (handle_assert_poison_fault(ucVoid, info->si_addr)) {
-+      return 1;
-+    }
-+  }
-+#endif
-+
-+  JavaThread* thread = NULL;
-+  VMThread* vmthread = NULL;
-+  if (os::Linux::signal_handlers_are_installed) {
-+    if (t != NULL ){
-+      if(t->is_Java_thread()) {
-+        thread = (JavaThread *) t;
-+      }
-+      else if(t->is_VM_thread()){
-+        vmthread = (VMThread *)t;
-+      }
-+    }
-+  }
-+
-+  // Handle SafeFetch faults
-+  if ((sig == SIGSEGV || sig == SIGBUS) && uc != NULL) {
-+    address const pc = (address) os::Linux::ucontext_get_pc(uc);
-+    if (pc && StubRoutines::is_safefetch_fault(pc)) {
-+      os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc));
-+      return 1;
-+    }
-+  }
- 
-   // decide if this trap can be handled by a stub
-   address stub = NULL;
- 
--  address pc = NULL;
-+  address pc          = NULL;
- 
-   //%note os_trap_1
-   if (info != NULL && uc != NULL && thread != NULL) {
--    pc = (address) os::Posix::ucontext_get_pc(uc);
--
--    address addr = (address) info->si_addr;
--
--    // Make sure the high order byte is sign extended, as it may be masked away by the hardware.
--    if ((uintptr_t(addr) & (uintptr_t(1) << 55)) != 0) {
--      addr = address(uintptr_t(addr) | (uintptr_t(0xFF) << 56));
--    }
-+    pc = (address) os::Linux::ucontext_get_pc(uc);
- 
-     // Handle ALL stack overflow variations here
-     if (sig == SIGSEGV) {
-+      address addr = (address) info->si_addr;
-+
-       // check if fault address is within thread stack
--      if (thread->is_in_full_stack(addr)) {
--        if (os::Posix::handle_stack_overflow(thread, addr, pc, uc, &stub)) {
--          return true; // continue
-+      if (thread->on_local_stack(addr)) {
-+        // stack overflow
-+        if (thread->in_stack_yellow_reserved_zone(addr)) {
-+          if (thread->thread_state() == _thread_in_Java) {
-+            if (thread->in_stack_reserved_zone(addr)) {
-+              frame fr;
-+              if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) {
-+                assert(fr.is_java_frame(), "Must be a Java frame");
-+                frame activation =
-+                  SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr);
-+                if (activation.sp() != NULL) {
-+                  thread->disable_stack_reserved_zone();
-+                  if (activation.is_interpreted_frame()) {
-+                    thread->set_reserved_stack_activation((address)(
-+                      activation.fp() + frame::interpreter_frame_initial_sp_offset));
-+                  } else {
-+                    thread->set_reserved_stack_activation((address)activation.unextended_sp());
-+                  }
-+                  return 1;
-+                }
-+              }
-+            }
-+            // Throw a stack overflow exception.  Guard pages will be reenabled
-+            // while unwinding the stack.
-+            thread->disable_stack_yellow_reserved_zone();
-+            stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
-+          } else {
-+            // Thread was in the vm or native code.  Return and try to finish.
-+            thread->disable_stack_yellow_reserved_zone();
-+            return 1;
-+          }
-+        } else if (thread->in_stack_red_zone(addr)) {
-+          // Fatal red zone violation.  Disable the guard pages and fall through
-+          // to handle_unexpected_exception way down below.
-+          thread->disable_stack_red_zone();
-+          tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
-+
-+          // This is a likely cause, but hard to verify. Let's just print
-+          // it as a hint.
-+          tty->print_raw_cr("Please check if any of your loaded .so files has "
-+                            "enabled executable stack (see man page execstack(8))");
-+        } else {
-+          // Accessing stack address below sp may cause SEGV if current
-+          // thread has MAP_GROWSDOWN stack. This should only happen when
-+          // current thread was created by user code with MAP_GROWSDOWN flag
-+          // and then attached to VM. See notes in os_linux.cpp.
-+          if (thread->osthread()->expanding_stack() == 0) {
-+             thread->osthread()->set_expanding_stack();
-+             if (os::Linux::manually_expand_stack(thread, addr)) {
-+               thread->osthread()->clear_expanding_stack();
-+               return 1;
-+             }
-+             thread->osthread()->clear_expanding_stack();
-+          } else {
-+             fatal("recursive segv. expanding stack.");
-+          }
-         }
-       }
-     }
-@@ -212,7 +318,7 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
-           tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL");
-         }
-         stub = SharedRuntime::get_handle_wrong_method_stub();
--      } else if (sig == SIGSEGV && SafepointMechanism::is_poll_address((address)info->si_addr)) {
-+      } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) {
-         stub = SharedRuntime::get_poll_stub(pc);
-       } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) {
-         // BugId 4454115: A read from a MappedByteBuffer can fault
-@@ -220,34 +326,12 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
-         // Do not crash the VM in such a case.
-         CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
-         CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
--        bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc));
--        if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) {
-+        if (nm != NULL && nm->has_unsafe_access()) {
-           address next_pc = pc + NativeCall::instruction_size;
--          if (is_unsafe_arraycopy) {
--            next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
--          }
-           stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
-         }
--      } else if (sig == SIGILL && nativeInstruction_at(pc)->is_stop()) {
--        // Pull a pointer to the error message out of the instruction
--        // stream.
--        const uint64_t *detail_msg_ptr
--          = (uint64_t*)(pc + NativeInstruction::instruction_size);
--        const char *detail_msg = (const char *)*detail_msg_ptr;
--        const char *msg = "stop";
--        if (TraceTraps) {
--          tty->print_cr("trap: %s: (SIGILL)", msg);
--        }
--
--        // End life with a fatal error, message and detail message and the context.
--        // Note: no need to do any post-processing here (e.g. signal chaining)
--        va_list va_dummy;
--        VMError::report_and_die(thread, uc, NULL, 0, msg, detail_msg, va_dummy);
--        va_end(va_dummy);
--
--        ShouldNotReachHere();
-       } else if (sig == SIGFPE  &&
--          (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) {
-+                 (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) {
-         stub =
-           SharedRuntime::
-           continuation_for_implicit_exception(thread,
-@@ -255,42 +339,70 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
-                                               SharedRuntime::
-                                               IMPLICIT_DIVIDE_BY_ZERO);
-       } else if (sig == SIGSEGV &&
--                 MacroAssembler::uses_implicit_null_check((void*)addr)) {
-+               !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) {
-           // Determination of interpreter/vtable stub/compiled code null exception
-           stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
-       }
--    } else if ((thread->thread_state() == _thread_in_vm ||
--                thread->thread_state() == _thread_in_native) &&
--                sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
--                thread->doing_unsafe_access()) {
-+    } else if (thread->thread_state() == _thread_in_vm &&
-+               sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
-+               thread->doing_unsafe_access()) {
-       address next_pc = pc + NativeCall::instruction_size;
--      if (UnsafeCopyMemory::contains_pc(pc)) {
--        next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
--      }
-       stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
-     }
- 
-     // jni_fast_Get<Primitive>Field can trap at certain pc's if a GC kicks in
-     // and the heap gets shrunk before the field access.
-     if ((sig == SIGSEGV) || (sig == SIGBUS)) {
--      address addr_slow = JNI_FastGetField::find_slowcase_pc(pc);
--      if (addr_slow != (address)-1) {
--        stub = addr_slow;
-+      address addr = JNI_FastGetField::find_slowcase_pc(pc);
-+      if (addr != (address)-1) {
-+        stub = addr;
-       }
-     }
-+
-+    // Check to see if we caught the safepoint code in the
-+    // process of write protecting the memory serialization page.
-+    // It write enables the page immediately after protecting it
-+    // so we can just return to retry the write.
-+    if ((sig == SIGSEGV) &&
-+        os::is_memory_serialize_page(thread, (address) info->si_addr)) {
-+      // Block current thread until the memory serialize page permission restored.
-+      os::block_on_serialize_page_trap();
-+      return true;
-+    }
-   }
- 
-   if (stub != NULL) {
-     // save all thread context in case we need to restore it
--    if (thread != NULL) {
--      thread->set_saved_exception_pc(pc);
--    }
-+    if (thread != NULL) thread->set_saved_exception_pc(pc);
- 
--    os::Posix::ucontext_set_pc(uc, stub);
-+    os::Linux::ucontext_set_pc(uc, stub);
-     return true;
-   }
- 
--  return false; // Mute compiler
-+  // signal-chaining
-+  if (os::Linux::chained_handler(sig, info, ucVoid)) {
-+     return true;
-+  }
-+
-+  if (!abort_if_unrecognized) {
-+    // caller wants another chance, so give it to him
-+    return false;
-+  }
-+
-+  if (pc == NULL && uc != NULL) {
-+    pc = os::Linux::ucontext_get_pc(uc);
-+  }
-+
-+  // unmask current signal
-+  sigset_t newset;
-+  sigemptyset(&newset);
-+  sigaddset(&newset, sig);
-+  sigprocmask(SIG_UNBLOCK, &newset, NULL);
-+
-+  VMError::report_and_die(t, sig, pc, info, ucVoid);
-+
-+  ShouldNotReachHere();
-+  return true; // Mute compiler
- }
- 
- void os::Linux::init_thread_fpu_state(void) {
-
-From fd3897410308e2fc54d84a9bd453b1b375e6aace Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 4 Apr 2023 15:24:57 +0800
-Subject: [PATCH 014/140] Revert: JDK-8248240: Remove extendedPC.hpp and
- fetch_frame_from_ucontext JDK-8253742: POSIX signal code cleanup
-
----
- .../os_cpu/linux_riscv/os_linux_riscv.cpp     | 38 ++++++++++++++-----
- .../os_cpu/linux_riscv/thread_linux_riscv.cpp |  9 +++--
- 2 files changed, 33 insertions(+), 14 deletions(-)
-
-diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
-index db15f1946e2..4f1c84c60a0 100644
---- a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
-+++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
-@@ -37,6 +37,7 @@
- #include "prims/jniFastGetField.hpp"
- #include "prims/jvm_misc.hpp"
- #include "runtime/arguments.hpp"
-+#include "runtime/extendedPC.hpp"
- #include "runtime/frame.inline.hpp"
- #include "runtime/interfaceSupport.inline.hpp"
- #include "runtime/java.hpp"
-@@ -85,11 +86,11 @@ char* os::non_memory_address_word() {
-   return (char*) -1;
- }
- 
--address os::Posix::ucontext_get_pc(const ucontext_t * uc) {
-+address os::Linux::ucontext_get_pc(const ucontext_t * uc) {
-   return (address)uc->uc_mcontext.__gregs[REG_PC];
- }
- 
--void os::Posix::ucontext_set_pc(ucontext_t * uc, address pc) {
-+void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) {
-   uc->uc_mcontext.__gregs[REG_PC] = (intptr_t)pc;
- }
- 
-@@ -101,13 +102,29 @@ intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) {
-   return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP];
- }
- 
--address os::fetch_frame_from_context(const void* ucVoid,
--                                     intptr_t** ret_sp, intptr_t** ret_fp) {
--  address epc;
-+// For Forte Analyzer AsyncGetCallTrace profiling support - thread
-+// is currently interrupted by SIGPROF.
-+// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal
-+// frames. Currently we don't do that on Linux, so it's the same as
-+// os::fetch_frame_from_context().
-+ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread,
-+  const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) {
-+
-+  assert(thread != NULL, "just checking");
-+  assert(ret_sp != NULL, "just checking");
-+  assert(ret_fp != NULL, "just checking");
-+
-+  return os::fetch_frame_from_context(uc, ret_sp, ret_fp);
-+}
-+
-+ExtendedPC os::fetch_frame_from_context(const void* ucVoid,
-+                    intptr_t** ret_sp, intptr_t** ret_fp) {
-+
-+  ExtendedPC epc;
-   const ucontext_t* uc = (const ucontext_t*)ucVoid;
- 
-   if (uc != NULL) {
--    epc = os::Posix::ucontext_get_pc(uc);
-+    epc = ExtendedPC(os::Linux::ucontext_get_pc(uc));
-     if (ret_sp != NULL) {
-       *ret_sp = os::Linux::ucontext_get_sp(uc);
-     }
-@@ -115,7 +132,8 @@ address os::fetch_frame_from_context(const void* ucVoid,
-       *ret_fp = os::Linux::ucontext_get_fp(uc);
-     }
-   } else {
--    epc = NULL;
-+    // construct empty ExtendedPC for return value checking
-+    epc = ExtendedPC(NULL);
-     if (ret_sp != NULL) {
-       *ret_sp = (intptr_t *)NULL;
-     }
-@@ -142,8 +160,8 @@ frame os::fetch_compiled_frame_from_context(const void* ucVoid) {
- frame os::fetch_frame_from_context(const void* ucVoid) {
-   intptr_t* frame_sp = NULL;
-   intptr_t* frame_fp = NULL;
--  address epc = fetch_frame_from_context(ucVoid, &frame_sp, &frame_fp);
--  return frame(frame_sp, frame_fp, epc);
-+  ExtendedPC epc = fetch_frame_from_context(ucVoid, &frame_sp, &frame_fp);
-+  return frame(frame_sp, frame_fp, epc.pc());
- }
- 
- // By default, gcc always saves frame pointer rfp on this stack. This
-@@ -465,7 +483,7 @@ void os::print_context(outputStream *st, const void *context) {
-   // Note: it may be unsafe to inspect memory near pc. For example, pc may
-   // point to garbage if entry point in an nmethod is corrupted. Leave
-   // this at the end, and hope for the best.
--  address pc = os::Posix::ucontext_get_pc(uc);
-+  address pc = os::Linux::ucontext_get_pc(uc);
-   print_instructions(st, pc, sizeof(char));
-   st->cr();
- }
-diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
-index 3100572e9fd..e46efc420b0 100644
---- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
-+++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
-@@ -61,16 +61,17 @@ bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava)
- 
-     intptr_t* ret_fp = NULL;
-     intptr_t* ret_sp = NULL;
--    address addr = os::fetch_frame_from_context(uc, &ret_sp, &ret_fp);
--    if (addr == NULL || ret_sp == NULL ) {
-+    ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc,
-+      &ret_sp, &ret_fp);
-+    if (addr.pc() == NULL || ret_sp == NULL ) {
-       // ucontext wasn't useful
-       return false;
-     }
- 
--    frame ret_frame(ret_sp, ret_fp, addr);
-+    frame ret_frame(ret_sp, ret_fp, addr.pc());
-     if (!ret_frame.safe_for_sender(this)) {
- #ifdef COMPILER2
--      frame ret_frame2(ret_sp, NULL, addr);
-+      frame ret_frame2(ret_sp, NULL, addr.pc());
-       if (!ret_frame2.safe_for_sender(this)) {
-         // nothing else to try if the frame isn't good
-         return false;
-
-From 892b40a435ae3f7e85659100ef68db1aeda7ef23 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 4 Apr 2023 15:33:50 +0800
-Subject: [PATCH 015/140] Revert JDK-8263002: Remove CDS MiscCode region
-
----
- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp         | 10 ++++++++++
- src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp |  6 ++++++
- 2 files changed, 16 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-index 4daed17df10..21aa3b58c09 100644
---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-@@ -187,6 +187,16 @@ bool SharedRuntime::is_wide_vector(int size) {
-   return false;
- }
- 
-+size_t SharedRuntime::trampoline_size() {
-+  return 6 * NativeInstruction::instruction_size;
-+}
-+
-+void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
-+  int32_t offset = 0;
-+  __ movptr_with_offset(t0, destination, offset);
-+  __ jalr(x0, t0, offset);
-+}
-+
- // The java_calling_convention describes stack locations as ideal slots on
- // a frame with no abi restrictions. Since we must observe abi restrictions
- // (like the placement of the register window) the slots must be biased by
-diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
-index e46efc420b0..31d9254d8ad 100644
---- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
-+++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
-@@ -68,6 +68,12 @@ bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava)
-       return false;
-     }
- 
-+    if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) {
-+      // In the middle of a trampoline call. Bail out for safety.
-+      // This happens rarely so shouldn't affect profiling.
-+      return false;
-+    }
-+
-     frame ret_frame(ret_sp, ret_fp, addr.pc());
-     if (!ret_frame.safe_for_sender(this)) {
- #ifdef COMPILER2
-
-From 945a317797bc96efe3f0717ca7258f081b96b14d Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 4 Apr 2023 15:52:43 +0800
-Subject: [PATCH 016/140] Revert JDK-8254158: Consolidate per-platform stack
- overflow handling code
-
----
- .../os_cpu/linux_riscv/os_linux_riscv.cpp     | 52 ++++++++++++++-----
- 1 file changed, 40 insertions(+), 12 deletions(-)
-
-diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
-index 4f1c84c60a0..8b772892b4b 100644
---- a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
-+++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
-@@ -145,18 +145,6 @@ ExtendedPC os::fetch_frame_from_context(const void* ucVoid,
-   return epc;
- }
- 
--frame os::fetch_compiled_frame_from_context(const void* ucVoid) {
--  const ucontext_t* uc = (const ucontext_t*)ucVoid;
--  // In compiled code, the stack banging is performed before RA
--  // has been saved in the frame. RA is live, and SP and FP
--  // belong to the caller.
--  intptr_t* frame_fp = os::Linux::ucontext_get_fp(uc);
--  intptr_t* frame_sp = os::Linux::ucontext_get_sp(uc);
--  address frame_pc = (address)(uc->uc_mcontext.__gregs[REG_LR]
--                         - NativeInstruction::instruction_size);
--  return frame(frame_sp, frame_fp, frame_pc);
--}
--
- frame os::fetch_frame_from_context(const void* ucVoid) {
-   intptr_t* frame_sp = NULL;
-   intptr_t* frame_fp = NULL;
-@@ -164,6 +152,46 @@ frame os::fetch_frame_from_context(const void* ucVoid) {
-   return frame(frame_sp, frame_fp, epc.pc());
- }
- 
-+bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) {
-+  address pc = (address) os::Linux::ucontext_get_pc(uc);
-+  if (Interpreter::contains(pc)) {
-+    // interpreter performs stack banging after the fixed frame header has
-+    // been generated while the compilers perform it before. To maintain
-+    // semantic consistency between interpreted and compiled frames, the
-+    // method returns the Java sender of the current frame.
-+    *fr = os::fetch_frame_from_context(uc);
-+    if (!fr->is_first_java_frame()) {
-+      assert(fr->safe_for_sender(thread), "Safety check");
-+      *fr = fr->java_sender();
-+    }
-+  } else {
-+    // more complex code with compiled code
-+    assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above");
-+    CodeBlob* cb = CodeCache::find_blob(pc);
-+    if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) {
-+      // Not sure where the pc points to, fallback to default
-+      // stack overflow handling
-+      return false;
-+    } else {
-+      // In compiled code, the stack banging is performed before RA
-+      // has been saved in the frame.  RA is live, and SP and FP
-+      // belong to the caller.
-+      intptr_t* fp = os::Linux::ucontext_get_fp(uc);
-+      intptr_t* sp = os::Linux::ucontext_get_sp(uc);
-+      address pc = (address)(uc->uc_mcontext.__gregs[REG_LR]
-+                         - NativeInstruction::instruction_size);
-+      *fr = frame(sp, fp, pc);
-+      if (!fr->is_java_frame()) {
-+        assert(fr->safe_for_sender(thread), "Safety check");
-+        assert(!fr->is_first_frame(), "Safety check");
-+        *fr = fr->java_sender();
-+      }
-+    }
-+  }
-+  assert(fr->is_java_frame(), "Safety check");
-+  return true;
-+}
-+
- // By default, gcc always saves frame pointer rfp on this stack. This
- // may get turned off by -fomit-frame-pointer.
- frame os::get_sender_for_C_frame(frame* fr) {
-
-From c1a03e0a376cc2c8748d83d66b576b66ee2e6962 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 4 Apr 2023 16:14:19 +0800
-Subject: [PATCH 017/140] Revert JDK-8202579: Revisit VM_Version and
- VM_Version_ext for overlap and consolidation
-
----
- .../cpu/riscv/vm_version_ext_riscv.cpp        | 87 +++++++++++++++++++
- .../cpu/riscv/vm_version_ext_riscv.hpp        | 55 ++++++++++++
- src/hotspot/cpu/riscv/vm_version_riscv.cpp    | 14 ---
- 3 files changed, 142 insertions(+), 14 deletions(-)
- create mode 100644 src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp
-
-diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp
-new file mode 100644
-index 00000000000..6bdce51506e
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp
-@@ -0,0 +1,87 @@
-+/*
-+ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#include "precompiled.hpp"
-+#include "memory/allocation.hpp"
-+#include "memory/allocation.inline.hpp"
-+#include "runtime/os.inline.hpp"
-+#include "vm_version_ext_riscv.hpp"
-+
-+// VM_Version_Ext statics
-+int VM_Version_Ext::_no_of_threads = 0;
-+int VM_Version_Ext::_no_of_cores = 0;
-+int VM_Version_Ext::_no_of_sockets = 0;
-+bool VM_Version_Ext::_initialized = false;
-+char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0};
-+char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0};
-+
-+void VM_Version_Ext::initialize_cpu_information(void) {
-+  // do nothing if cpu info has been initialized
-+  if (_initialized) {
-+    return;
-+  }
-+
-+  _no_of_cores  = os::processor_count();
-+  _no_of_threads = _no_of_cores;
-+  _no_of_sockets = _no_of_cores;
-+  snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64");
-+  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string);
-+  _initialized = true;
-+}
-+
-+int VM_Version_Ext::number_of_threads(void) {
-+  initialize_cpu_information();
-+  return _no_of_threads;
-+}
-+
-+int VM_Version_Ext::number_of_cores(void) {
-+  initialize_cpu_information();
-+  return _no_of_cores;
-+}
-+
-+int VM_Version_Ext::number_of_sockets(void) {
-+  initialize_cpu_information();
-+  return _no_of_sockets;
-+}
-+
-+const char* VM_Version_Ext::cpu_name(void) {
-+  initialize_cpu_information();
-+  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing);
-+  if (NULL == tmp) {
-+    return NULL;
-+  }
-+  strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE);
-+  return tmp;
-+}
-+
-+const char* VM_Version_Ext::cpu_description(void) {
-+  initialize_cpu_information();
-+  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing);
-+  if (NULL == tmp) {
-+    return NULL;
-+  }
-+  strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
-+  return tmp;
-+}
-diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp
-new file mode 100644
-index 00000000000..711e4aeaf68
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp
-@@ -0,0 +1,55 @@
-+/*
-+ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#ifndef CPU_RISCV_VM_VERSION_EXT_RISCV_HPP
-+#define CPU_RISCV_VM_VERSION_EXT_RISCV_HPP
-+
-+#include "runtime/vm_version.hpp"
-+#include "utilities/macros.hpp"
-+
-+class VM_Version_Ext : public VM_Version {
-+ private:
-+  static const size_t      CPU_TYPE_DESC_BUF_SIZE = 256;
-+  static const size_t      CPU_DETAILED_DESC_BUF_SIZE = 4096;
-+
-+  static int               _no_of_threads;
-+  static int               _no_of_cores;
-+  static int               _no_of_sockets;
-+  static bool              _initialized;
-+  static char              _cpu_name[CPU_TYPE_DESC_BUF_SIZE];
-+  static char              _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE];
-+
-+ public:
-+  static int number_of_threads(void);
-+  static int number_of_cores(void);
-+  static int number_of_sockets(void);
-+
-+  static const char* cpu_name(void);
-+  static const char* cpu_description(void);
-+  static void initialize_cpu_information(void);
-+
-+};
-+
-+#endif // CPU_RISCV_VM_VERSION_EXT_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-index 2c15a834542..dd65f32277f 100644
---- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-@@ -210,17 +210,3 @@ void VM_Version::c2_initialize() {
-   }
- }
- #endif // COMPILER2
--
--void VM_Version::initialize_cpu_information(void) {
--  // do nothing if cpu info has been initialized
--  if (_initialized) {
--    return;
--  }
--
--  _no_of_cores  = os::processor_count();
--  _no_of_threads = _no_of_cores;
--  _no_of_sockets = _no_of_cores;
--  snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64");
--  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string);
--  _initialized = true;
--}
-
-From 0cfdbd8595c710b71be008bb531b59acf9c4b016 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 4 Apr 2023 17:16:05 +0800
-Subject: [PATCH 018/140] Revert JDK-8191278: MappedByteBuffer bulk access
- memory failures are not handled gracefully
-
----
- src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 19 ++-----------------
- 1 file changed, 2 insertions(+), 17 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-index 39416441bdf..8392b768847 100644
---- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-@@ -1049,12 +1049,7 @@ class StubGenerator: public StubCodeGenerator {
-       __ push_reg(RegSet::of(d, count), sp);
-     }
- 
--    {
--      // UnsafeCopyMemory page error: continue after ucm
--      bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size);
--      UnsafeCopyMemoryMark ucmm(this, add_entry, true);
--      copy_memory(aligned, s, d, count, t0, size);
--    }
-+    copy_memory(aligned, s, d, count, t0, size);
- 
-     if (is_oop) {
-       __ pop_reg(RegSet::of(d, count), sp);
-@@ -1122,12 +1117,7 @@ class StubGenerator: public StubCodeGenerator {
-       __ push_reg(RegSet::of(d, count), sp);
-     }
- 
--    {
--      // UnsafeCopyMemory page error: continue after ucm
--      bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size);
--      UnsafeCopyMemoryMark ucmm(this, add_entry, true);
--      copy_memory(aligned, s, d, count, t0, -size);
--    }
-+    copy_memory(aligned, s, d, count, t0, -size);
- 
-     if (is_oop) {
-       __ pop_reg(RegSet::of(d, count), sp);
-@@ -3734,11 +3724,6 @@ class StubGenerator: public StubCodeGenerator {
-   ~StubGenerator() {}
- }; // end class declaration
- 
--#define UCM_TABLE_MAX_ENTRIES 8
- void StubGenerator_generate(CodeBuffer* code, bool all) {
--  if (UnsafeCopyMemory::_table == NULL) {
--    UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES);
--  }
--
-   StubGenerator g(code, all);
- }
-
-From dd6a7c520a5adeef5b6686c161554adcba61113f Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 15:55:09 +0800
-Subject: [PATCH 019/140] Revert JDK-8282085: The REGISTER_DEFINITION macro is
- useless after JDK-8269122
-
----
- .../cpu/riscv/register_definitions_riscv.cpp  | 192 ++++++++++++++++++
- 1 file changed, 192 insertions(+)
- create mode 100644 src/hotspot/cpu/riscv/register_definitions_riscv.cpp
-
-diff --git a/src/hotspot/cpu/riscv/register_definitions_riscv.cpp b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp
-new file mode 100644
-index 00000000000..583f67573ca
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp
-@@ -0,0 +1,192 @@
-+/*
-+ * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#include "precompiled.hpp"
-+#include "asm/assembler.hpp"
-+#include "asm/register.hpp"
-+#include "interp_masm_riscv.hpp"
-+#include "register_riscv.hpp"
-+
-+REGISTER_DEFINITION(Register, noreg);
-+
-+REGISTER_DEFINITION(Register, x0);
-+REGISTER_DEFINITION(Register, x1);
-+REGISTER_DEFINITION(Register, x2);
-+REGISTER_DEFINITION(Register, x3);
-+REGISTER_DEFINITION(Register, x4);
-+REGISTER_DEFINITION(Register, x5);
-+REGISTER_DEFINITION(Register, x6);
-+REGISTER_DEFINITION(Register, x7);
-+REGISTER_DEFINITION(Register, x8);
-+REGISTER_DEFINITION(Register, x9);
-+REGISTER_DEFINITION(Register, x10);
-+REGISTER_DEFINITION(Register, x11);
-+REGISTER_DEFINITION(Register, x12);
-+REGISTER_DEFINITION(Register, x13);
-+REGISTER_DEFINITION(Register, x14);
-+REGISTER_DEFINITION(Register, x15);
-+REGISTER_DEFINITION(Register, x16);
-+REGISTER_DEFINITION(Register, x17);
-+REGISTER_DEFINITION(Register, x18);
-+REGISTER_DEFINITION(Register, x19);
-+REGISTER_DEFINITION(Register, x20);
-+REGISTER_DEFINITION(Register, x21);
-+REGISTER_DEFINITION(Register, x22);
-+REGISTER_DEFINITION(Register, x23);
-+REGISTER_DEFINITION(Register, x24);
-+REGISTER_DEFINITION(Register, x25);
-+REGISTER_DEFINITION(Register, x26);
-+REGISTER_DEFINITION(Register, x27);
-+REGISTER_DEFINITION(Register, x28);
-+REGISTER_DEFINITION(Register, x29);
-+REGISTER_DEFINITION(Register, x30);
-+REGISTER_DEFINITION(Register, x31);
-+
-+REGISTER_DEFINITION(FloatRegister, fnoreg);
-+
-+REGISTER_DEFINITION(FloatRegister, f0);
-+REGISTER_DEFINITION(FloatRegister, f1);
-+REGISTER_DEFINITION(FloatRegister, f2);
-+REGISTER_DEFINITION(FloatRegister, f3);
-+REGISTER_DEFINITION(FloatRegister, f4);
-+REGISTER_DEFINITION(FloatRegister, f5);
-+REGISTER_DEFINITION(FloatRegister, f6);
-+REGISTER_DEFINITION(FloatRegister, f7);
-+REGISTER_DEFINITION(FloatRegister, f8);
-+REGISTER_DEFINITION(FloatRegister, f9);
-+REGISTER_DEFINITION(FloatRegister, f10);
-+REGISTER_DEFINITION(FloatRegister, f11);
-+REGISTER_DEFINITION(FloatRegister, f12);
-+REGISTER_DEFINITION(FloatRegister, f13);
-+REGISTER_DEFINITION(FloatRegister, f14);
-+REGISTER_DEFINITION(FloatRegister, f15);
-+REGISTER_DEFINITION(FloatRegister, f16);
-+REGISTER_DEFINITION(FloatRegister, f17);
-+REGISTER_DEFINITION(FloatRegister, f18);
-+REGISTER_DEFINITION(FloatRegister, f19);
-+REGISTER_DEFINITION(FloatRegister, f20);
-+REGISTER_DEFINITION(FloatRegister, f21);
-+REGISTER_DEFINITION(FloatRegister, f22);
-+REGISTER_DEFINITION(FloatRegister, f23);
-+REGISTER_DEFINITION(FloatRegister, f24);
-+REGISTER_DEFINITION(FloatRegister, f25);
-+REGISTER_DEFINITION(FloatRegister, f26);
-+REGISTER_DEFINITION(FloatRegister, f27);
-+REGISTER_DEFINITION(FloatRegister, f28);
-+REGISTER_DEFINITION(FloatRegister, f29);
-+REGISTER_DEFINITION(FloatRegister, f30);
-+REGISTER_DEFINITION(FloatRegister, f31);
-+
-+REGISTER_DEFINITION(VectorRegister, vnoreg);
-+
-+REGISTER_DEFINITION(VectorRegister, v0);
-+REGISTER_DEFINITION(VectorRegister, v1);
-+REGISTER_DEFINITION(VectorRegister, v2);
-+REGISTER_DEFINITION(VectorRegister, v3);
-+REGISTER_DEFINITION(VectorRegister, v4);
-+REGISTER_DEFINITION(VectorRegister, v5);
-+REGISTER_DEFINITION(VectorRegister, v6);
-+REGISTER_DEFINITION(VectorRegister, v7);
-+REGISTER_DEFINITION(VectorRegister, v8);
-+REGISTER_DEFINITION(VectorRegister, v9);
-+REGISTER_DEFINITION(VectorRegister, v10);
-+REGISTER_DEFINITION(VectorRegister, v11);
-+REGISTER_DEFINITION(VectorRegister, v12);
-+REGISTER_DEFINITION(VectorRegister, v13);
-+REGISTER_DEFINITION(VectorRegister, v14);
-+REGISTER_DEFINITION(VectorRegister, v15);
-+REGISTER_DEFINITION(VectorRegister, v16);
-+REGISTER_DEFINITION(VectorRegister, v17);
-+REGISTER_DEFINITION(VectorRegister, v18);
-+REGISTER_DEFINITION(VectorRegister, v19);
-+REGISTER_DEFINITION(VectorRegister, v20);
-+REGISTER_DEFINITION(VectorRegister, v21);
-+REGISTER_DEFINITION(VectorRegister, v22);
-+REGISTER_DEFINITION(VectorRegister, v23);
-+REGISTER_DEFINITION(VectorRegister, v24);
-+REGISTER_DEFINITION(VectorRegister, v25);
-+REGISTER_DEFINITION(VectorRegister, v26);
-+REGISTER_DEFINITION(VectorRegister, v27);
-+REGISTER_DEFINITION(VectorRegister, v28);
-+REGISTER_DEFINITION(VectorRegister, v29);
-+REGISTER_DEFINITION(VectorRegister, v30);
-+REGISTER_DEFINITION(VectorRegister, v31);
-+
-+REGISTER_DEFINITION(Register, c_rarg0);
-+REGISTER_DEFINITION(Register, c_rarg1);
-+REGISTER_DEFINITION(Register, c_rarg2);
-+REGISTER_DEFINITION(Register, c_rarg3);
-+REGISTER_DEFINITION(Register, c_rarg4);
-+REGISTER_DEFINITION(Register, c_rarg5);
-+REGISTER_DEFINITION(Register, c_rarg6);
-+REGISTER_DEFINITION(Register, c_rarg7);
-+
-+REGISTER_DEFINITION(FloatRegister, c_farg0);
-+REGISTER_DEFINITION(FloatRegister, c_farg1);
-+REGISTER_DEFINITION(FloatRegister, c_farg2);
-+REGISTER_DEFINITION(FloatRegister, c_farg3);
-+REGISTER_DEFINITION(FloatRegister, c_farg4);
-+REGISTER_DEFINITION(FloatRegister, c_farg5);
-+REGISTER_DEFINITION(FloatRegister, c_farg6);
-+REGISTER_DEFINITION(FloatRegister, c_farg7);
-+
-+REGISTER_DEFINITION(Register, j_rarg0);
-+REGISTER_DEFINITION(Register, j_rarg1);
-+REGISTER_DEFINITION(Register, j_rarg2);
-+REGISTER_DEFINITION(Register, j_rarg3);
-+REGISTER_DEFINITION(Register, j_rarg4);
-+REGISTER_DEFINITION(Register, j_rarg5);
-+REGISTER_DEFINITION(Register, j_rarg6);
-+REGISTER_DEFINITION(Register, j_rarg7);
-+
-+REGISTER_DEFINITION(FloatRegister, j_farg0);
-+REGISTER_DEFINITION(FloatRegister, j_farg1);
-+REGISTER_DEFINITION(FloatRegister, j_farg2);
-+REGISTER_DEFINITION(FloatRegister, j_farg3);
-+REGISTER_DEFINITION(FloatRegister, j_farg4);
-+REGISTER_DEFINITION(FloatRegister, j_farg5);
-+REGISTER_DEFINITION(FloatRegister, j_farg6);
-+REGISTER_DEFINITION(FloatRegister, j_farg7);
-+
-+REGISTER_DEFINITION(Register, zr);
-+REGISTER_DEFINITION(Register, gp);
-+REGISTER_DEFINITION(Register, tp);
-+REGISTER_DEFINITION(Register, xmethod);
-+REGISTER_DEFINITION(Register, ra);
-+REGISTER_DEFINITION(Register, sp);
-+REGISTER_DEFINITION(Register, fp);
-+REGISTER_DEFINITION(Register, xheapbase);
-+REGISTER_DEFINITION(Register, xcpool);
-+REGISTER_DEFINITION(Register, xmonitors);
-+REGISTER_DEFINITION(Register, xlocals);
-+REGISTER_DEFINITION(Register, xthread);
-+REGISTER_DEFINITION(Register, xbcp);
-+REGISTER_DEFINITION(Register, xdispatch);
-+REGISTER_DEFINITION(Register, esp);
-+
-+REGISTER_DEFINITION(Register, t0);
-+REGISTER_DEFINITION(Register, t1);
-+REGISTER_DEFINITION(Register, t2);
-
-From 561261b051d88ddb0053733f03cbefc75dedcea8 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 16:41:03 +0800
-Subject: [PATCH 020/140] Revert JDK-7175279: Don't use x87 FPU on x86-64
-
----
- src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp | 12 ++++++++++++
- 1 file changed, 12 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-index 0e383a3c139..977563fe5f4 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-@@ -2019,6 +2019,18 @@ address LIR_Assembler::int_constant(jlong n) {
-   }
- }
- 
-+void LIR_Assembler::set_24bit_FPU() { Unimplemented(); }
-+
-+void LIR_Assembler::reset_FPU() { Unimplemented(); }
-+
-+void LIR_Assembler::fpop() { Unimplemented(); }
-+
-+void LIR_Assembler::fxch(int i) { Unimplemented(); }
-+
-+void LIR_Assembler::fld(int i) { Unimplemented(); }
-+
-+void LIR_Assembler::ffree(int i) { Unimplemented(); }
-+
- void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) {
-   __ cmpxchg(addr, cmpval, newval, Assembler::int32, Assembler::aq /* acquire */,
-              Assembler::rl /* release */, t0, true /* result as bool */);
-
-From ff4e1443fd000208714b506d52c0fab1c91e4ac8 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 16:41:15 +0800
-Subject: [PATCH 021/140] Revert JDK-8255909: Remove unused delayed_value
- methods
-
----
- src/hotspot/cpu/riscv/assembler_riscv.hpp      |  7 +++++++
- src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 16 ++++++++++++++++
- src/hotspot/cpu/riscv/macroAssembler_riscv.hpp |  4 ++++
- 3 files changed, 27 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
-index 4923962a496..44e8d4b4ff1 100644
---- a/src/hotspot/cpu/riscv/assembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
-@@ -3027,6 +3027,13 @@ enum Nf {
-   Assembler(CodeBuffer* code) : AbstractAssembler(code), _in_compressible_region(false) {
-   }
- 
-+  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
-+                                                Register tmp,
-+                                                int offset) {
-+    ShouldNotCallThis();
-+    return RegisterOrConstant();
-+  }
-+
-   // Stack overflow checking
-   virtual void bang_stack_with_offset(int offset) { Unimplemented(); }
- 
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index 269d76ba69e..878957cbede 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -191,6 +191,22 @@ void MacroAssembler::call_VM(Register oop_result,
- void MacroAssembler::check_and_handle_earlyret(Register java_thread) {}
- void MacroAssembler::check_and_handle_popframe(Register java_thread) {}
- 
-+RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
-+                                                      Register tmp,
-+                                                      int offset) {
-+  intptr_t value = *delayed_value_addr;
-+  if (value != 0)
-+    return RegisterOrConstant(value + offset);
-+
-+  // load indirectly to solve generation ordering problem
-+  ld(tmp, ExternalAddress((address) delayed_value_addr));
-+
-+  if (offset != 0)
-+    add(tmp, tmp, offset);
-+
-+  return RegisterOrConstant(tmp);
-+}
-+
- // Calls to C land
- //
- // When entering C land, the fp, & esp of the last Java frame have to be recorded
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index b59bdadb8bf..f23f7e7d1e6 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -625,6 +625,10 @@ class MacroAssembler: public Assembler {
- 
-   void reserved_stack_check();
- 
-+  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
-+                                                Register tmp,
-+                                                int offset);
-+
-   void get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype);
-   void read_polling_page(Register r, address page, relocInfo::relocType rtype);
-   void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
-
-From afe35a3fdc705645bfe2a2e797a95ce1d5203872 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 16:51:39 +0800
-Subject: [PATCH 022/140] Revert JDK-8263679: C1: Remove vtable call
-
----
- src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp | 5 +++++
- 1 file changed, 5 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-index 977563fe5f4..a0ecc63d851 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-@@ -1382,6 +1382,11 @@ void LIR_Assembler::ic_call(LIR_OpJavaCall* op) {
-   add_call_info(code_offset(), op->info());
- }
- 
-+/* Currently, vtable-dispatch is only enabled for sparc platforms */
-+void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) {
-+  ShouldNotReachHere();
-+}
-+
- void LIR_Assembler::emit_static_call_stub() {
-   address call_pc = __ pc();
-   assert((__ offset() % 4) == 0, "bad alignment");
-
-From 655b34c00ec5ff6fa7e82de96a78a0c58ba91985 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 16:55:57 +0800
-Subject: [PATCH 023/140] Revert JDK-8264063: Outer Safepoint poll load should
- not reference the head of inner strip mined loop.
-
----
- src/hotspot/cpu/riscv/riscv.ad | 14 ++++++++++++++
- 1 file changed, 14 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 8c7a8ede815..fcddf752564 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -952,6 +952,20 @@ int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
-   return align_up(current_offset, alignment_required()) - current_offset;
- }
- 
-+// Indicate if the safepoint node needs the polling page as an input
-+
-+// the shared code plants the oop data at the start of the generated
-+// code for the safepoint node and that needs ot be at the load
-+// instruction itself. so we cannot plant a mov of the safepoint poll
-+// address followed by a load. setting this to true means the mov is
-+// scheduled as a prior instruction. that's better for scheduling
-+// anyway.
-+
-+bool SafePointNode::needs_polling_address_input()
-+{
-+  return true;
-+}
-+
- //=============================================================================
- 
- #ifndef PRODUCT
-
-From 4a6f7dafdb4e0cf054b7867de60f789d4ca1d9f3 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 17:26:29 +0800
-Subject: [PATCH 024/140] Revert: JDK-8266810: Move trivial Matcher code to
- cpu-specific header files JDK-8254966: Remove unused code from Matcher
-
----
- src/hotspot/cpu/riscv/matcher_riscv.hpp | 129 ------------------------
- src/hotspot/cpu/riscv/riscv.ad          | 108 +++++++++++++++++++-
- 2 files changed, 107 insertions(+), 130 deletions(-)
- delete mode 100644 src/hotspot/cpu/riscv/matcher_riscv.hpp
-
-diff --git a/src/hotspot/cpu/riscv/matcher_riscv.hpp b/src/hotspot/cpu/riscv/matcher_riscv.hpp
-deleted file mode 100644
-index 4c7fabd7240..00000000000
---- a/src/hotspot/cpu/riscv/matcher_riscv.hpp
-+++ /dev/null
-@@ -1,129 +0,0 @@
--/*
-- * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2021, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#ifndef CPU_RISCV_MATCHER_RISCV_HPP
--#define CPU_RISCV_MATCHER_RISCV_HPP
--
--  // Defined within class Matcher
--
--  // false => size gets scaled to BytesPerLong, ok.
--  static const bool init_array_count_is_in_bytes = false;
--
--  // riscv doesn't support misaligned vectors store/load on JDK11.
--  static constexpr bool misaligned_vectors_ok() {
--    return false;
--  }
--
--  // Whether code generation need accurate ConvI2L types.
--  static const bool convi2l_type_required = false;
--
--  // Does the CPU require late expand (see block.cpp for description of late expand)?
--  static const bool require_postalloc_expand = false;
--
--  // Do we need to mask the count passed to shift instructions or does
--  // the cpu only look at the lower 5/6 bits anyway?
--  static const bool need_masked_shift_count = false;
--
--  static constexpr bool isSimpleConstant64(jlong value) {
--    // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
--    // Probably always true, even if a temp register is required.
--    return true;
--  }
--
--  // Use conditional move (CMOVL)
--  static constexpr int long_cmove_cost() {
--    // long cmoves are no more expensive than int cmoves
--    return 0;
--  }
--
--  static constexpr int float_cmove_cost() {
--    // float cmoves are no more expensive than int cmoves
--    return 0;
--  }
--
--  // This affects two different things:
--  //  - how Decode nodes are matched
--  //  - how ImplicitNullCheck opportunities are recognized
--  // If true, the matcher will try to remove all Decodes and match them
--  // (as operands) into nodes. NullChecks are not prepared to deal with
--  // Decodes by final_graph_reshaping().
--  // If false, final_graph_reshaping() forces the decode behind the Cmp
--  // for a NullCheck. The matcher matches the Decode node into a register.
--  // Implicit_null_check optimization moves the Decode along with the
--  // memory operation back up before the NullCheck.
--  static bool narrow_oop_use_complex_address() {
--    return CompressedOops::shift() == 0;
--  }
--
--  static bool narrow_klass_use_complex_address() {
--    return false;
--  }
--
--  static bool const_oop_prefer_decode() {
--    // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
--    return CompressedOops::base() == NULL;
--  }
--
--  static bool const_klass_prefer_decode() {
--    // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
--    return CompressedKlassPointers::base() == NULL;
--  }
--
--  // Is it better to copy float constants, or load them directly from
--  // memory?  Intel can load a float constant from a direct address,
--  // requiring no extra registers.  Most RISCs will have to materialize
--  // an address into a register first, so they would do better to copy
--  // the constant from stack.
--  static const bool rematerialize_float_constants = false;
--
--  // If CPU can load and store mis-aligned doubles directly then no
--  // fixup is needed.  Else we split the double into 2 integer pieces
--  // and move it piece-by-piece.  Only happens when passing doubles into
--  // C code as the Java calling convention forces doubles to be aligned.
--  static const bool misaligned_doubles_ok = true;
--
--  // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
--  static const bool strict_fp_requires_explicit_rounding = false;
--
--  // Are floats converted to double when stored to stack during
--  // deoptimization?
--  static constexpr bool float_in_double() { return false; }
--
--  // Do ints take an entire long register or just half?
--  // The relevant question is how the int is callee-saved:
--  // the whole long is written but de-opt'ing will have to extract
--  // the relevant 32 bits.
--  static const bool int_in_long = true;
--
--  // true means we have fast l2f convers
--  // false means that conversion is done by runtime call
--  static constexpr bool convL2FSupported(void) {
--      return true;
--  }
--
--  // Implements a variant of EncodeISOArrayNode that encode ASCII only
--  static const bool supports_encode_ascii_array = false;
--
--#endif // CPU_RISCV_MATCHER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index fcddf752564..a9e5f2e6841 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -330,7 +330,9 @@ alloc_class chunk2(RFLAGS);
- // Several register classes are automatically defined based upon information in
- // this architecture description.
- // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
--// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
-+// 2) reg_class compiler_method_reg        ( /* as def'd in frame section */ )
-+// 2) reg_class interpreter_method_reg     ( /* as def'd in frame section */ )
-+// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
- //
- 
- // Class for all 32 bit general purpose registers
-@@ -1548,6 +1550,17 @@ bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
-   return (-4096 <= offs && offs < 4096);
- }
- 
-+const bool Matcher::isSimpleConstant64(jlong value) {
-+  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
-+  // Probably always true, even if a temp register is required.
-+  return true;
-+}
-+
-+// true just means we have fast l2f conversion
-+const bool Matcher::convL2FSupported(void) {
-+  return true;
-+}
-+
- // Vector width in bytes.
- const int Matcher::vector_width_in_bytes(BasicType bt) {
-   return 0;
-@@ -1567,6 +1580,94 @@ const uint Matcher::vector_ideal_reg(int len) {
-   return 0;
- }
- 
-+// RISC-V supports misaligned vectors store/load.
-+const bool Matcher::misaligned_vectors_ok() {
-+  return true;
-+}
-+
-+// false => size gets scaled to BytesPerLong, ok.
-+const bool Matcher::init_array_count_is_in_bytes = false;
-+
-+// Use conditional move (CMOVL)
-+const int Matcher::long_cmove_cost() {
-+  // long cmoves are no more expensive than int cmoves
-+  return 0;
-+}
-+
-+const int Matcher::float_cmove_cost() {
-+  // float cmoves are no more expensive than int cmoves
-+  return 0;
-+}
-+
-+// Does the CPU require late expand (see block.cpp for description of late expand)?
-+const bool Matcher::require_postalloc_expand = false;
-+
-+// Do we need to mask the count passed to shift instructions or does
-+// the cpu only look at the lower 5/6 bits anyway?
-+const bool Matcher::need_masked_shift_count = false;
-+
-+// This affects two different things:
-+//  - how Decode nodes are matched
-+//  - how ImplicitNullCheck opportunities are recognized
-+// If true, the matcher will try to remove all Decodes and match them
-+// (as operands) into nodes. NullChecks are not prepared to deal with
-+// Decodes by final_graph_reshaping().
-+// If false, final_graph_reshaping() forces the decode behind the Cmp
-+// for a NullCheck. The matcher matches the Decode node into a register.
-+// Implicit_null_check optimization moves the Decode along with the
-+// memory operation back up before the NullCheck.
-+bool Matcher::narrow_oop_use_complex_address() {
-+  return Universe::narrow_oop_shift() == 0;
-+}
-+
-+bool Matcher::narrow_klass_use_complex_address() {
-+// TODO
-+// decide whether we need to set this to true
-+  return false;
-+}
-+
-+bool Matcher::const_oop_prefer_decode() {
-+  // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
-+  return Universe::narrow_oop_base() == NULL;
-+}
-+
-+bool Matcher::const_klass_prefer_decode() {
-+  // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
-+  return Universe::narrow_klass_base() == NULL;
-+}
-+
-+// Is it better to copy float constants, or load them directly from
-+// memory?  Intel can load a float constant from a direct address,
-+// requiring no extra registers.  Most RISCs will have to materialize
-+// an address into a register first, so they would do better to copy
-+// the constant from stack.
-+const bool Matcher::rematerialize_float_constants = false;
-+
-+// If CPU can load and store mis-aligned doubles directly then no
-+// fixup is needed.  Else we split the double into 2 integer pieces
-+// and move it piece-by-piece.  Only happens when passing doubles into
-+// C code as the Java calling convention forces doubles to be aligned.
-+const bool Matcher::misaligned_doubles_ok = true;
-+
-+// No-op on amd64
-+void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
-+  Unimplemented();
-+}
-+
-+// Advertise here if the CPU requires explicit rounding operations to
-+// implement the UseStrictFP mode.
-+const bool Matcher::strict_fp_requires_explicit_rounding = false;
-+
-+// Are floats converted to double when stored to stack during
-+// deoptimization?
-+bool Matcher::float_in_double() { return false; }
-+
-+// Do ints take an entire long register or just half?
-+// The relevant question is how the int is callee-saved:
-+// the whole long is written but de-opt'ing will have to extract
-+// the relevant 32 bits.
-+const bool Matcher::int_in_long = true;
-+
- // Return whether or not this register is ever used as an argument.
- // This function is used on startup to build the trampoline stubs in
- // generateOptoStub.  Registers not mentioned will be killed by the VM
-@@ -1671,6 +1772,8 @@ bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
-   return true;
- }
- 
-+const bool Matcher::convi2l_type_required = false;
-+
- // Should the Matcher clone input 'm' of node 'n'?
- bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
-   assert_cond(m != NULL);
-@@ -2250,6 +2353,9 @@ frame %{
-   // Inline Cache Register or methodOop for I2C.
-   inline_cache_reg(R31);
- 
-+  // Method Oop Register when calling interpreter.
-+  interpreter_method_oop_reg(R31);
-+
-   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
-   cisc_spilling_operand_name(indOffset);
- 
-
-From 4b0f20882cd9b5e5da92d61c2fa02e0cbea0ef0c Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 17:30:42 +0800
-Subject: [PATCH 025/140] Revert JDK-8256238: Remove
- Matcher::pass_original_key_for_aes
-
----
- src/hotspot/cpu/riscv/riscv.ad | 5 +++++
- 1 file changed, 5 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index a9e5f2e6841..0d1afd5584a 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1580,6 +1580,11 @@ const uint Matcher::vector_ideal_reg(int len) {
-   return 0;
- }
- 
-+// AES support not yet implemented
-+const bool Matcher::pass_original_key_for_aes() {
-+  return false;
-+}
-+
- // RISC-V supports misaligned vectors store/load.
- const bool Matcher::misaligned_vectors_ok() {
-   return true;
-
-From 36d7ecedbcd95911d1b355bbab3e8fdf81b36e7d Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 17:42:37 +0800
-Subject: [PATCH 026/140] Revert JDK-8242492: C2: Remove
- Matcher::vector_shift_count_ideal_reg()
-
----
- src/hotspot/cpu/riscv/riscv.ad | 5 +++++
- 1 file changed, 5 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 0d1afd5584a..c10e91633a5 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1580,6 +1580,11 @@ const uint Matcher::vector_ideal_reg(int len) {
-   return 0;
- }
- 
-+const uint Matcher::vector_shift_count_ideal_reg(int size) {
-+  fatal("vector shift is not supported");
-+  return Node::NotAMachineReg;
-+}
-+
- // AES support not yet implemented
- const bool Matcher::pass_original_key_for_aes() {
-   return false;
-
-From b78e448a460fcdc66553e66342e93e5ac87c0c61 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 17:47:13 +0800
-Subject: [PATCH 027/140] Revert JDK-8266937: Remove Compile::reshape_address
-
----
- src/hotspot/cpu/riscv/riscv.ad | 3 +++
- 1 file changed, 3 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index c10e91633a5..2c5ec0451b8 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1801,6 +1801,9 @@ bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack,
-   return clone_base_plus_offset_address(m, mstack, address_visited);
- }
- 
-+void Compile::reshape_address(AddPNode* addp) {
-+}
-+
- %}
- 
- 
-
-From cd34a5ce5d120cdac939217976d1e7b7e98bf654 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 17:49:09 +0800
-Subject: [PATCH 028/140] Revert JDK-8272771: frame::pd_ps() is not implemented
- on any platform
-
----
- src/hotspot/cpu/riscv/frame_riscv.cpp | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
-index 8e7babe2c61..8e4f20fe561 100644
---- a/src/hotspot/cpu/riscv/frame_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
-@@ -683,6 +683,7 @@ frame::frame(void* ptr_sp, void* ptr_fp, void* pc) {
-   init((intptr_t*)ptr_sp, (intptr_t*)ptr_fp, (address)pc);
- }
- 
-+void frame::pd_ps() {}
- #endif
- 
- void JavaFrameAnchor::make_walkable(JavaThread* thread) {
-
-From bdb16daf6d809d0c38256be99ecbe922d24b889b Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 17:56:27 +0800
-Subject: [PATCH 029/140] Revert JDK-8268858: Determine register pressure
- automatically by the number of available registers for allocation
-
----
- src/hotspot/cpu/riscv/riscv.ad | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 2c5ec0451b8..a6aa52de29e 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1527,6 +1527,10 @@ const bool Matcher::has_predicated_vectors(void) {
-   return false;
- }
- 
-+const int Matcher::float_pressure(int default_pressure_threshold) {
-+  return default_pressure_threshold;
-+}
-+
- // Is this branch offset short enough that a short branch can be used?
- //
- // NOTE: If the platform does not provide any short branch variants, then
-
-From bbaa7a97b5d8110ead9dc44f31e2c5fe3bcd83d5 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 17:58:16 +0800
-Subject: [PATCH 030/140] Revert JDK-8253040: Remove unused
- Matcher::regnum_to_fpu_offset()
-
----
- src/hotspot/cpu/riscv/riscv.ad | 6 ++++++
- 1 file changed, 6 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index a6aa52de29e..2d847cb6454 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1531,6 +1531,12 @@ const int Matcher::float_pressure(int default_pressure_threshold) {
-   return default_pressure_threshold;
- }
- 
-+int Matcher::regnum_to_fpu_offset(int regnum)
-+{
-+  Unimplemented();
-+  return 0;
-+}
-+
- // Is this branch offset short enough that a short branch can be used?
- //
- // NOTE: If the platform does not provide any short branch variants, then
-
-From ce9ad0af72e405153534369bff1b1725697f3e40 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 18:03:23 +0800
-Subject: [PATCH 031/140] Revert JDK-8254084: Remove
- TemplateTable::pd_initialize
-
----
- src/hotspot/cpu/riscv/templateTable_riscv.cpp | 6 ++++++
- 1 file changed, 6 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-index 4e388ac4eaa..c9d399ccdaf 100644
---- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-@@ -48,6 +48,12 @@
- 
- #define __ _masm->
- 
-+// Platform-dependent initialization
-+
-+void TemplateTable::pd_initialize() {
-+  // No RISC-V specific initialization
-+}
-+
- // Address computation: local variables
- 
- static inline Address iaddress(int n) {
-
-From 49429187846e6f2b00ab2853e27097eae274a947 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 20:17:07 +0800
-Subject: [PATCH 032/140] Revert JDK-8224815: 8224815: Remove non-GC uses of
- CollectedHeap::is_in_reserved()
-
----
- src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index 878957cbede..cf01d7d74bb 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -1632,7 +1632,7 @@ void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) {
- #ifdef ASSERT
-     {
-       ThreadInVMfromUnknown tiv;
--      assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop");
-+      assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop");
-     }
- #endif
-     oop_index = oop_recorder()->find_index(obj);
-@@ -2800,7 +2800,7 @@ void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
-     assert (UseCompressedOops, "should only be used for compressed oops");
-     assert (Universe::heap() != NULL, "java heap should be initialized");
-     assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
--    assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop");
-+    assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop");
-   }
- #endif
-   int oop_index = oop_recorder()->find_index(obj);
-@@ -2815,7 +2815,7 @@ void  MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
-   assert (UseCompressedClassPointers, "should only be used for compressed headers");
-   assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
-   int index = oop_recorder()->find_index(k);
--  assert(!Universe::heap()->is_in(k), "should not be an oop");
-+  assert(!Universe::heap()->is_in_reserved(k), "should not be an oop");
- 
-   InstructionMark im(this);
-   RelocationHolder rspec = metadata_Relocation::spec(index);
-
-From a71fabb1ff05db9955557a888be6cd1b5f87deea Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 21:14:30 +0800
-Subject: [PATCH 033/140] Revert JDK-8253540: InterpreterRuntime::monitorexit
- should be a JRT_LEAF function
-
----
- src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 8 ++++++--
- 1 file changed, 6 insertions(+), 2 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-index 8adc7b1320d..48957803fdc 100644
---- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-@@ -839,7 +839,9 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg)
-   assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1");
- 
-   if (UseHeavyMonitors) {
--    call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
-+    call_VM(noreg,
-+            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
-+            lock_reg);
-   } else {
-     Label done;
- 
-@@ -871,7 +873,9 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg)
- 
-     // Call the runtime routine for slow case.
-     sd(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj
--    call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
-+    call_VM(noreg,
-+            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
-+            lock_reg);
- 
-     bind(done);
- 
-
-From a0b18eea3c83ef8f1de2c1b3cd55452f0f6b9af2 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Fri, 7 Apr 2023 12:51:33 +0800
-Subject: [PATCH 034/140] Revert JDK-8278387: riscv: Implement UseHeavyMonitors
- consistently && JDK-8279826: riscv: Preserve result in native wrapper with
- +UseHeavyMonitors
-
----
- .../cpu/riscv/c1_LIRAssembler_riscv.cpp       |  8 +-
- src/hotspot/cpu/riscv/riscv.ad                | 92 +++++++++----------
- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 85 ++++++++---------
- 3 files changed, 80 insertions(+), 105 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-index a0ecc63d851..dd657963438 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-@@ -367,11 +367,7 @@ int LIR_Assembler::emit_unwind_handler() {
-   if (method()->is_synchronized()) {
-     monitor_address(0, FrameMap::r10_opr);
-     stub = new MonitorExitStub(FrameMap::r10_opr, true, 0);
--    if (UseHeavyMonitors) {
--      __ j(*stub->entry());
--    } else {
--      __ unlock_object(x15, x14, x10, *stub->entry());
--    }
-+    __ unlock_object(x15, x14, x10, *stub->entry());
-     __ bind(*stub->continuation());
-   }
- 
-@@ -1512,7 +1508,7 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) {
-   Register obj = op->obj_opr()->as_register();  // may not be an oop
-   Register hdr = op->hdr_opr()->as_register();
-   Register lock = op->lock_opr()->as_register();
--  if (UseHeavyMonitors) {
-+  if (!UseFastLocking) {
-     __ j(*op->stub()->entry());
-   } else if (op->code() == lir_lock) {
-     assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 2d847cb6454..29027d594a0 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -2109,40 +2109,36 @@ encode %{
-     __ andi(t0, disp_hdr, markOopDesc::monitor_value);
-     __ bnez(t0, object_has_monitor);
- 
--    if (!UseHeavyMonitors) {
--      // Set tmp to be (markWord of object | UNLOCK_VALUE).
--      __ ori(tmp, disp_hdr, markOopDesc::unlocked_value);
--
--      // Initialize the box. (Must happen before we update the object mark!)
--      __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
--
--      // Compare object markWord with an unlocked value (tmp) and if
--      // equal exchange the stack address of our box with object markWord.
--      // On failure disp_hdr contains the possibly locked markWord.
--      __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq,
--                 Assembler::rl, /*result*/disp_hdr);
--      __ mv(flag, zr);
--      __ beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas
--
--      assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
--
--      // If the compare-and-exchange succeeded, then we found an unlocked
--      // object, will have now locked it will continue at label cont
--      // We did not see an unlocked object so try the fast recursive case.
--
--      // Check if the owner is self by comparing the value in the
--      // markWord of object (disp_hdr) with the stack pointer.
--      __ sub(disp_hdr, disp_hdr, sp);
--      __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place));
--      // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont,
--      // hence we can store 0 as the displaced header in the box, which indicates that it is a
--      // recursive lock.
--      __ andr(tmp/*==0?*/, disp_hdr, tmp);
--      __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
--      __ mv(flag, tmp); // we can use the value of tmp as the result here
--    } else {
--      __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow-path
--    }
-+    // Set tmp to be (markWord of object | UNLOCK_VALUE).
-+    __ ori(tmp, disp_hdr, markOopDesc::unlocked_value);
-+
-+    // Initialize the box. (Must happen before we update the object mark!)
-+    __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
-+
-+    // Compare object markWord with an unlocked value (tmp) and if
-+    // equal exchange the stack address of our box with object markWord.
-+    // On failure disp_hdr contains the possibly locked markWord.
-+    __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq,
-+               Assembler::rl, /*result*/disp_hdr);
-+    __ mv(flag, zr);
-+    __ beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas
-+
-+    assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
-+
-+    // If the compare-and-exchange succeeded, then we found an unlocked
-+    // object, will have now locked it will continue at label cont
-+    // We did not see an unlocked object so try the fast recursive case.
-+
-+    // Check if the owner is self by comparing the value in the
-+    // markWord of object (disp_hdr) with the stack pointer.
-+    __ sub(disp_hdr, disp_hdr, sp);
-+    __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place));
-+    // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont,
-+    // hence we can store 0 as the displaced header in the box, which indicates that it is a
-+    // recursive lock.
-+    __ andr(tmp/*==0?*/, disp_hdr, tmp);
-+    __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
-+    __ mv(flag, tmp); // we can use the value of tmp as the result here
- 
-     __ j(cont);
- 
-@@ -2189,31 +2185,25 @@ encode %{
- 
-     assert_different_registers(oop, box, tmp, disp_hdr, flag);
- 
--    if (!UseHeavyMonitors) {
--      // Find the lock address and load the displaced header from the stack.
--      __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
-+    // Find the lock address and load the displaced header from the stack.
-+    __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
- 
--      // If the displaced header is 0, we have a recursive unlock.
--      __ mv(flag, disp_hdr);
--      __ beqz(disp_hdr, cont);
--    }
-+    // If the displaced header is 0, we have a recursive unlock.
-+    __ mv(flag, disp_hdr);
-+    __ beqz(disp_hdr, cont);
- 
-     // Handle existing monitor.
-     __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
-     __ andi(t0, disp_hdr, markOopDesc::monitor_value);
-     __ bnez(t0, object_has_monitor);
- 
--    if (!UseHeavyMonitors) {
--      // Check if it is still a light weight lock, this is true if we
--      // see the stack address of the basicLock in the markWord of the
--      // object.
-+    // Check if it is still a light weight lock, this is true if we
-+    // see the stack address of the basicLock in the markWord of the
-+    // object.
- 
--      __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed,
--                 Assembler::rl, /*result*/tmp);
--      __ xorr(flag, box, tmp); // box == tmp if cas succeeds
--    } else {
--      __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow path
--    }
-+    __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed,
-+               Assembler::rl, /*result*/tmp);
-+    __ xorr(flag, box, tmp); // box == tmp if cas succeeds
-     __ j(cont);
- 
-     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
-diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-index 21aa3b58c09..5203200b068 100644
---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-@@ -1488,39 +1488,35 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
-     // Load the oop from the handle
-     __ ld(obj_reg, Address(oop_handle_reg, 0));
- 
--    if (!UseHeavyMonitors) {
--      // Load (object->mark() | 1) into swap_reg % x10
--      __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
--      __ ori(swap_reg, t0, 1);
--
--      // Save (object->mark() | 1) into BasicLock's displaced header
--      __ sd(swap_reg, Address(lock_reg, mark_word_offset));
--
--      // src -> dest if dest == x10 else x10 <- dest
--      {
--        Label here;
--        __ cmpxchg_obj_header(x10, lock_reg, obj_reg, t0, lock_done, /*fallthrough*/NULL);
--      }
-+    // Load (object->mark() | 1) into swap_reg % x10
-+    __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-+    __ ori(swap_reg, t0, 1);
- 
--      // Test if the oopMark is an obvious stack pointer, i.e.,
--      //  1) (mark & 3) == 0, and
--      //  2) sp <= mark < mark + os::pagesize()
--      // These 3 tests can be done by evaluating the following
--      // expression: ((mark - sp) & (3 - os::vm_page_size())),
--      // assuming both stack pointer and pagesize have their
--      // least significant 2 bits clear.
--      // NOTE: the oopMark is in swap_reg % 10 as the result of cmpxchg
--
--      __ sub(swap_reg, swap_reg, sp);
--      __ andi(swap_reg, swap_reg, 3 - os::vm_page_size());
--
--      // Save the test result, for recursive case, the result is zero
--      __ sd(swap_reg, Address(lock_reg, mark_word_offset));
--      __ bnez(swap_reg, slow_path_lock);
--    } else {
--      __ j(slow_path_lock);
-+    // Save (object->mark() | 1) into BasicLock's displaced header
-+    __ sd(swap_reg, Address(lock_reg, mark_word_offset));
-+
-+    // src -> dest if dest == x10 else x10 <- dest
-+    {
-+      Label here;
-+      __ cmpxchg_obj_header(x10, lock_reg, obj_reg, t0, lock_done, /*fallthrough*/NULL);
-     }
- 
-+    // Test if the oopMark is an obvious stack pointer, i.e.,
-+    //  1) (mark & 3) == 0, and
-+    //  2) sp <= mark < mark + os::pagesize()
-+    // These 3 tests can be done by evaluating the following
-+    // expression: ((mark - sp) & (3 - os::vm_page_size())),
-+    // assuming both stack pointer and pagesize have their
-+    // least significant 2 bits clear.
-+    // NOTE: the oopMark is in swap_reg % 10 as the result of cmpxchg
-+
-+    __ sub(swap_reg, swap_reg, sp);
-+    __ andi(swap_reg, swap_reg, 3 - os::vm_page_size());
-+
-+    // Save the test result, for recursive case, the result is zero
-+    __ sd(swap_reg, Address(lock_reg, mark_word_offset));
-+    __ bnez(swap_reg, slow_path_lock);
-+
-     // Slow path will re-enter here
-     __ bind(lock_done);
-   }
-@@ -1608,31 +1604,24 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
- 
-     Label done;
- 
--    if (!UseHeavyMonitors) {
--      // Simple recursive lock?
--      __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
--      __ beqz(t0, done);
--    }
--
-+    // Simple recursive lock?
-+    __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
-+    __ beqz(t0, done);
- 
-     // Must save x10 if if it is live now because cmpxchg must use it
-     if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
-       save_native_result(masm, ret_type, stack_slots);
-     }
- 
--    if (!UseHeavyMonitors) {
--      // get address of the stack lock
--      __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
--      //  get old displaced header
--      __ ld(old_hdr, Address(x10, 0));
-+    // get address of the stack lock
-+    __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
-+    //  get old displaced header
-+    __ ld(old_hdr, Address(x10, 0));
- 
--      // Atomic swap old header if oop still contains the stack lock
--      Label succeed;
--      __ cmpxchg_obj_header(x10, old_hdr, obj_reg, t0, succeed, &slow_path_unlock);
--      __ bind(succeed);
--    } else {
--      __ j(slow_path_unlock);
--    }
-+    // Atomic swap old header if oop still contains the stack lock
-+    Label succeed;
-+    __ cmpxchg_obj_header(x10, old_hdr, obj_reg, t0, succeed, &slow_path_unlock);
-+    __ bind(succeed);
- 
-     // slow path re-enters here
-     __ bind(unlock_done);
-
-From 1e844b8019cb3516c0843826de2bd3fcd2222f41 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 4 Apr 2023 16:49:19 +0800
-Subject: [PATCH 035/140] Revert JDK-8258192: Obsolete the CriticalJNINatives
- flag. CriticalJNINatives is unimplemented() even on AArch64. See
- https://bugs.openjdk.org/browse/JDK-8254694.
-
-Also following up 8191129: AARCH64: Invalid value passed to critical JNI function
----
- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp                  | 3 ++-
- src/hotspot/cpu/riscv/vm_version_riscv.cpp                     | 2 ++
- .../criticalnatives/argumentcorruption/CheckLongArgs.java      | 2 +-
- .../jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java  | 2 +-
- 4 files changed, 6 insertions(+), 3 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-index 5203200b068..f8585afbdc2 100644
---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-@@ -1111,7 +1111,8 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
-                                                 int compile_id,
-                                                 BasicType* in_sig_bt,
-                                                 VMRegPair* in_regs,
--                                                BasicType ret_type) {
-+                                                BasicType ret_type,
-+                                                address critical_entry) {
-   if (method->is_method_handle_intrinsic()) {
-     vmIntrinsics::ID iid = method->intrinsic_id();
-     intptr_t start = (intptr_t)__ pc();
-diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-index dd65f32277f..c0491d23fa6 100644
---- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-@@ -147,6 +147,8 @@ void VM_Version::initialize() {
- #ifdef COMPILER2
-   c2_initialize();
- #endif // COMPILER2
-+
-+  UNSUPPORTED_OPTION(CriticalJNINatives);
- }
- 
- #ifdef COMPILER2
-diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
-index acb86812d25..2c866f26f08 100644
---- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
-+++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
-@@ -24,7 +24,7 @@
- 
- /* @test
-  * @bug 8167409
-- * @requires (os.arch != "aarch64") & (os.arch != "arm")
-+ * @requires (os.arch != "aarch64") & (os.arch != "riscv64") & (os.arch != "arm")
-  * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.argumentcorruption.CheckLongArgs
-  */
- package compiler.runtime.criticalnatives.argumentcorruption;
-diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
-index eab36f93113..1da369fde23 100644
---- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
-+++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
-@@ -24,7 +24,7 @@
- 
- /* @test
-  * @bug 8167408
-- * @requires (os.arch != "aarch64") & (os.arch != "arm")
-+ * @requires (os.arch != "aarch64") & (os.arch != "riscv64") & (os.arch != "arm")
-  * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.lookup.LookUp
-  */
- package compiler.runtime.criticalnatives.lookup;
-
-From 58ad930e78501c6fad024e7ef05066ec19eb6219 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 11 Apr 2023 11:45:04 +0800
-Subject: [PATCH 036/140] 8202976: Add C1 lea patching support for x86 (RISC-V
- part)
-
----
- src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-index dd657963438..46a20a64194 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-@@ -1818,6 +1818,7 @@ void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, C
-     return;
-   }
- 
-+  assert(patch_code == lir_patch_none, "Patch code not supported");
-   LIR_Address* adr = addr->as_address_ptr();
-   Register dst = dest->as_register_lo();
- 
-
-From 2074b8ec0ea3562f3999b4f4010b3f5b57dbe502 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 11 Apr 2023 12:15:44 +0800
-Subject: [PATCH 037/140] Revert 8232365: Implementation for JEP 363: Remove
- the Concurrent Mark Sweep (CMS) Garbage Collector
-
----
- src/hotspot/cpu/riscv/globals_riscv.hpp |  3 +++
- src/hotspot/cpu/riscv/riscv.ad          | 27 +++++++++++++++++++++++++
- 2 files changed, 30 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
-index 845064d6cbc..50bbb6a77b8 100644
---- a/src/hotspot/cpu/riscv/globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
-@@ -63,6 +63,9 @@ define_pd_global(bool, RewriteFrequentPairs, true);
- 
- define_pd_global(bool, PreserveFramePointer, false);
- 
-+// GC Ergo Flags
-+define_pd_global(uintx, CMSYoungGenPerWorker, 64*M);  // default max size of CMS young gen, per GC worker thread
-+
- define_pd_global(uintx, TypeProfileLevel, 111);
- 
- define_pd_global(bool, CompactStrings, true);
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 29027d594a0..386ef731696 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -752,6 +752,9 @@ bool is_CAS(int opcode, bool maybe_volatile);
- // predicate controlling translation of CompareAndSwapX
- bool needs_acquiring_load_reserved(const Node *load);
- 
-+// predicate controlling translation of StoreCM
-+bool unnecessary_storestore(const Node *storecm);
-+
- // predicate controlling addressing modes
- bool size_fits_all_mem_uses(AddPNode* addp, int shift);
- %}
-@@ -874,6 +877,29 @@ bool needs_acquiring_load_reserved(const Node *n)
-   // so we can just return true here
-   return true;
- }
-+
-+// predicate controlling translation of StoreCM
-+//
-+// returns true if a StoreStore must precede the card write otherwise
-+// false
-+
-+bool unnecessary_storestore(const Node *storecm)
-+{
-+  assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
-+
-+  // we need to generate a dmb ishst between an object put and the
-+  // associated card mark when we are using CMS without conditional
-+  // card marking
-+
-+  if (UseConcMarkSweepGC && !UseCondCardMark) {
-+    return false;
-+  }
-+
-+  // a storestore is unnecesary in all other cases
-+
-+  return true;
-+}
-+
- #define __ _masm.
- 
- // advance declarations for helper functions to convert register
-@@ -4566,6 +4592,7 @@ instruct loadConD0(fRegD dst, immD0 con) %{
- instruct storeimmCM0(immI0 zero, memory mem)
- %{
-   match(Set mem (StoreCM mem zero));
-+  predicate(unnecessary_storestore(n));
- 
-   ins_cost(STORE_COST);
-   format %{ "storestore (elided)\n\t"
-
-From f838cf41b48c6bc17d052531ab5594de236b1302 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 11 Apr 2023 22:06:58 +0800
-Subject: [PATCH 038/140] Revert 8220051: Remove global safepoint code
-
----
- src/hotspot/cpu/riscv/interp_masm_riscv.cpp   |  3 +-
- .../cpu/riscv/macroAssembler_riscv.cpp        | 26 ++++++++++-
- .../cpu/riscv/macroAssembler_riscv.hpp        |  3 +-
- src/hotspot/cpu/riscv/riscv.ad                | 43 +++++++++++++++++++
- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp |  4 +-
- .../templateInterpreterGenerator_riscv.cpp    |  2 +-
- 6 files changed, 75 insertions(+), 6 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-index 48957803fdc..74dded77d19 100644
---- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-@@ -515,7 +515,8 @@ void InterpreterMacroAssembler::dispatch_base(TosState state,
- 
-   Label safepoint;
-   address* const safepoint_table = Interpreter::safept_table(state);
--  bool needs_thread_local_poll = generate_poll && table != safepoint_table;
-+  bool needs_thread_local_poll = generate_poll &&
-+    SafepointMechanism::uses_thread_local_poll() && table != safepoint_table;
- 
-   if (needs_thread_local_poll) {
-     NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index cf01d7d74bb..73629e3dba3 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -264,6 +264,30 @@ void MacroAssembler::set_last_Java_frame(Register last_java_sp,
-   }
- }
- 
-+// Just like safepoint_poll, but use an acquiring load for thread-
-+// local polling.
-+//
-+// We need an acquire here to ensure that any subsequent load of the
-+// global SafepointSynchronize::_state flag is ordered after this load
-+// of the local Thread::_polling page.  We don't want this poll to
-+// return false (i.e. not safepointing) and a later poll of the global
-+// SafepointSynchronize::_state spuriously to return true.
-+//
-+// This is to avoid a race when we're in a native->Java transition
-+// racing the code which wakes up from a safepoint.
-+//
-+void MacroAssembler::safepoint_poll_acquire(Label& slow_path) {
-+  if (SafepointMechanism::uses_thread_local_poll()) {
-+    membar(MacroAssembler::AnyAny);
-+    ld(t1, Address(xthread, Thread::polling_page_offset()));
-+    membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
-+    andi(t0, t1, SafepointMechanism::poll_bit());
-+    bnez(t0, slow_path);
-+  } else {
-+    safepoint_poll(slow_path);
-+  }
-+}
-+
- void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
-   // we must set sp to zero to clear frame
-   sd(zr, Address(xthread, JavaThread::last_Java_sp_offset()));
-@@ -2137,7 +2161,7 @@ void MacroAssembler::check_klass_subtype(Register sub_klass,
-   bind(L_failure);
- }
- 
--void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) {
-+void MacroAssembler::safepoint_poll(Label& slow_path) {
-   if (SafepointMechanism::uses_thread_local_poll()) {
-     ld(t1, Address(xthread, Thread::polling_page_offset()));
-     andi(t0, t1, SafepointMechanism::poll_bit());
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index f23f7e7d1e6..8a2c6e07d88 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -44,7 +44,8 @@ class MacroAssembler: public Assembler {
-   }
-   virtual ~MacroAssembler() {}
- 
--  void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod);
-+  void safepoint_poll(Label& slow_path);
-+  void safepoint_poll_acquire(Label& slow_path);
- 
-   // Place a fence.i after code may have been modified due to a safepoint.
-   void safepoint_ifence();
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 386ef731696..2dde4453dac 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1213,6 +1213,14 @@ const Pipeline * MachEpilogNode::pipeline() const {
-   return MachNode::pipeline_class();
- }
- 
-+// This method seems to be obsolete. It is declared in machnode.hpp
-+// and defined in all *.ad files, but it is never called. Should we
-+// get rid of it?
-+int MachEpilogNode::safepoint_offset() const {
-+  assert(do_polling(), "no return for this epilog node");
-+  return 4;
-+}
-+
- //=============================================================================
- 
- // Figure out which register class each belongs in: rc_int, rc_float or
-@@ -1907,6 +1915,17 @@ encode %{
-     __ li(dst_reg, 1);
-   %}
- 
-+  enc_class riscv_enc_mov_poll_page(iRegP dst, immPollPage src) %{
-+    MacroAssembler _masm(&cbuf);
-+    int32_t offset = 0;
-+    address page = (address)$src$$constant;
-+    unsigned long align = (unsigned long)page & 0xfff;
-+    assert(align == 0, "polling page must be page aligned");
-+    Register dst_reg = as_Register($dst$$reg);
-+    __ la_patchable(dst_reg, Address(page, relocInfo::poll_type), offset);
-+    __ addi(dst_reg, dst_reg, offset);
-+  %}
-+
-   enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{
-     C2_MacroAssembler _masm(&cbuf);
-     __ load_byte_map_base($dst$$Register);
-@@ -2688,6 +2707,17 @@ operand immP_1()
-   interface(CONST_INTER);
- %}
- 
-+// Polling Page Pointer Immediate
-+operand immPollPage()
-+%{
-+  predicate((address)n->get_ptr() == os::get_polling_page());
-+  match(ConP);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
- // Card Table Byte Map Base
- operand immByteMapBase()
- %{
-@@ -4476,6 +4506,19 @@ instruct loadConP1(iRegPNoSp dst, immP_1 con)
-   ins_pipe(ialu_imm);
- %}
- 
-+// Load Poll Page Constant
-+instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
-+%{
-+  match(Set dst con);
-+
-+  ins_cost(ALU_COST * 6);
-+  format %{ "movptr  $dst, $con\t# Poll Page Ptr, #@loadConPollPage" %}
-+
-+  ins_encode(riscv_enc_mov_poll_page(dst, con));
-+
-+  ins_pipe(ialu_imm);
-+%}
-+
- // Load Byte Map Base Constant
- instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
- %{
-diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-index f8585afbdc2..c501c8f7bac 100644
---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-@@ -1573,7 +1573,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
-     // This is to avoid a race when we're in a native->Java transition
-     // racing the code which wakes up from a safepoint.
- 
--    __ safepoint_poll(safepoint_in_progress, true /* at_return */, true /* acquire */, false /* in_nmethod */);
-+    __ safepoint_poll_acquire(safepoint_in_progress);
-     __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset()));
-     __ bnez(t0, safepoint_in_progress);
-     __ bind(safepoint_in_progress_done);
-@@ -2439,7 +2439,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
-   __ bind(noException);
- 
-   Label no_adjust, bail;
--  if (!cause_return) {
-+  if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
-     // If our stashed return pc was modified by the runtime we avoid touching it
-     __ ld(t0, Address(fp, frame::return_addr_offset * wordSize));
-     __ bne(x18, t0, no_adjust);
-diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-index 76ae6f89e27..2d4baab2ab7 100644
---- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-@@ -1143,7 +1143,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
-     //
-     // This is to avoid a race when we're in a native->Java transition
-     // racing the code which wakes up from a safepoint.
--    __ safepoint_poll(L, true /* at_return */, true /* acquire */, false /* in_nmethod */);
-+    __ safepoint_poll_acquire(L);
-     __ lwu(t1, Address(xthread, JavaThread::suspend_flags_offset()));
-     __ beqz(t1, Continue);
-     __ bind(L);
-
-From 13faeae35312c59a1366d4f9c84da7157f06efc7 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 11 Apr 2023 22:15:14 +0800
-Subject: [PATCH 039/140] Revert 8253180: ZGC: Implementation of JEP 376: ZGC:
- Concurrent Thread-Stack Processing
-
----
- src/hotspot/cpu/riscv/frame_riscv.cpp                    | 8 ++------
- src/hotspot/cpu/riscv/frame_riscv.hpp                    | 3 ---
- src/hotspot/cpu/riscv/interp_masm_riscv.cpp              | 1 -
- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp            | 8 --------
- .../cpu/riscv/templateInterpreterGenerator_riscv.cpp     | 9 ---------
- 5 files changed, 2 insertions(+), 27 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
-index 8e4f20fe561..b056eb2488a 100644
---- a/src/hotspot/cpu/riscv/frame_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
-@@ -495,8 +495,8 @@ frame frame::sender_for_compiled_frame(RegisterMap* map) const {
- }
- 
- //------------------------------------------------------------------------------
--// frame::sender_raw
--frame frame::sender_raw(RegisterMap* map) const {
-+// frame::sender
-+frame frame::sender(RegisterMap* map) const {
-   // Default is we done have to follow them. The sender_for_xxx will
-   // update it accordingly
-   assert(map != NULL, "map must be set");
-@@ -521,10 +521,6 @@ frame frame::sender_raw(RegisterMap* map) const {
-   return frame(sender_sp(), link(), sender_pc());
- }
- 
--frame frame::sender(RegisterMap* map) const {
--  return sender_raw(map);
--}
--
- bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
-   assert(is_interpreted_frame(), "Not an interpreted frame");
-   // These are reasonable sanity checks
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp
-index c06aaa9e391..3b88f6d5a1a 100644
---- a/src/hotspot/cpu/riscv/frame_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/frame_riscv.hpp
-@@ -196,7 +196,4 @@
- 
-   static jint interpreter_frame_expression_stack_direction() { return -1; }
- 
--  // returns the sending frame, without applying any barriers
--  frame sender_raw(RegisterMap* map) const;
--
- #endif // CPU_RISCV_FRAME_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-index 74dded77d19..4e642af87c4 100644
---- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-@@ -571,7 +571,6 @@ void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
- 
- // remove activation
- //
--// Apply stack watermark barrier.
- // Unlock the receiver if this is a synchronized method.
- // Unlock any Java monitors from syncronized blocks.
- // Remove the activation from the stack.
-diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-index c501c8f7bac..d740c99c979 100644
---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-@@ -1565,14 +1565,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
- 
-   // check for safepoint operation in progress and/or pending suspend requests
-   {
--    // We need an acquire here to ensure that any subsequent load of the
--    // global SafepointSynchronize::_state flag is ordered after this load
--    // of the thread-local polling word. We don't want this poll to
--    // return false (i.e. not safepointing) and a later poll of the global
--    // SafepointSynchronize::_state spuriously to return true.
--    // This is to avoid a race when we're in a native->Java transition
--    // racing the code which wakes up from a safepoint.
--
-     __ safepoint_poll_acquire(safepoint_in_progress);
-     __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset()));
-     __ bnez(t0, safepoint_in_progress);
-diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-index 2d4baab2ab7..a07dea35b73 100644
---- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-@@ -1134,15 +1134,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
-   // check for safepoint operation in progress and/or pending suspend requests
-   {
-     Label L, Continue;
--
--    // We need an acquire here to ensure that any subsequent load of the
--    // global SafepointSynchronize::_state flag is ordered after this load
--    // of the thread-local polling word. We don't want this poll to
--    // return false (i.e. not safepointing) and a later poll of the global
--    // SafepointSynchronize::_state spuriously to return true.
--    //
--    // This is to avoid a race when we're in a native->Java transition
--    // racing the code which wakes up from a safepoint.
-     __ safepoint_poll_acquire(L);
-     __ lwu(t1, Address(xthread, JavaThread::suspend_flags_offset()));
-     __ beqz(t1, Continue);
-
-From 99ca43f1e7e74f161b40466f49fc61aa734d334d Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Wed, 12 Apr 2023 12:35:33 +0800
-Subject: [PATCH 040/140] JDK-8243155: AArch64: Add support for SqrtVF
-
----
- src/hotspot/cpu/riscv/riscv.ad | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 2dde4453dac..9da8a76c190 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -7206,7 +7206,7 @@ instruct absD_reg(fRegD dst, fRegD src) %{
- %}
- 
- instruct sqrtF_reg(fRegF dst, fRegF src) %{
--  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
-+  match(Set dst (SqrtF src));
- 
-   ins_cost(FSQRT_COST);
-   format %{ "fsqrt.s  $dst, $src\t#@sqrtF_reg" %}
-
-From 4bbd814dfbc33d3f1277dbb64f19a18f9f8c1a81 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Wed, 12 Apr 2023 15:11:49 +0800
-Subject: [PATCH 041/140] Revert JDK-8267098: AArch64: C1 StubFrames end
- confusingly
-
----
- src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp | 52 ++++++++++-----------
- 1 file changed, 24 insertions(+), 28 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
-index f523c9ed50a..1f58bde4df5 100644
---- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
-@@ -167,19 +167,14 @@ int StubAssembler::call_RT(Register oop_result, Register metadata_result, addres
-   return call_RT(oop_result, metadata_result, entry, arg_num);
- }
- 
--enum return_state_t {
--  does_not_return, requires_return
--};
--
- // Implementation of StubFrame
- 
- class StubFrame: public StackObj {
-  private:
-   StubAssembler* _sasm;
--  bool _return_state;
- 
-  public:
--  StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state=requires_return);
-+  StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments);
-   void load_argument(int offset_in_words, Register reg);
- 
-   ~StubFrame();
-@@ -197,9 +192,8 @@ void StubAssembler::epilogue() {
- 
- #define __ _sasm->
- 
--StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state) {
-+StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments) {
-   _sasm = sasm;
--  _return_state = return_state;
-   __ prologue(name, must_gc_arguments);
- }
- 
-@@ -211,11 +205,7 @@ void StubFrame::load_argument(int offset_in_words, Register reg) {
- 
- 
- StubFrame::~StubFrame() {
--  if (_return_state == requires_return) {
--    __ epilogue();
--  } else {
--    __ should_not_reach_here();
--  }
-+  __ epilogue();
-   _sasm = NULL;
- }
- 
-@@ -378,6 +368,7 @@ OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address targe
-   assert_cond(oop_maps != NULL);
-   oop_maps->add_gc_map(call_offset, oop_map);
- 
-+  __ should_not_reach_here();
-   return oop_maps;
- }
- 
-@@ -425,7 +416,9 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
-       sasm->set_frame_size(frame_size);
-       break;
-     }
--    default: ShouldNotReachHere();
-+    default:
-+      __ should_not_reach_here();
-+      break;
-   }
- 
-   // verify that only x10 and x13 are valid at this time
-@@ -481,6 +474,9 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
-       restore_live_registers(sasm, id != handle_exception_nofpu_id);
-       break;
-     case handle_exception_from_callee_id:
-+      // Pop the return address.
-+      __ leave();
-+      __ ret();  // jump to exception handler
-       break;
-     default: ShouldNotReachHere();
-   }
-@@ -641,13 +637,13 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
- 
-     case throw_div0_exception_id:
-       {
--        StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments, does_not_return);
-+        StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments);
-         oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false);
-       }
-       break;
- 
-     case throw_null_pointer_exception_id:
--      { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments, does_not_return);
-+      { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments);
-         oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false);
-       }
-       break;
-@@ -926,14 +922,14 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
- 
-     case throw_class_cast_exception_id:
-       {
--        StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments, does_not_return);
-+        StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments);
-         oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true);
-       }
-       break;
- 
-     case throw_incompatible_class_change_error_id:
-       {
--        StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments, does_not_return);
-+        StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments);
-         oop_maps = generate_exception_throw(sasm,
-                                             CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
-       }
-@@ -1027,7 +1023,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
- 
-     case deoptimize_id:
-       {
--        StubFrame f(sasm, "deoptimize", dont_gc_arguments, does_not_return);
-+        StubFrame f(sasm, "deoptimize", dont_gc_arguments);
-         OopMap* oop_map = save_live_registers(sasm);
-         assert_cond(oop_map != NULL);
-         f.load_argument(0, c_rarg1);
-@@ -1046,7 +1042,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
- 
-     case throw_range_check_failed_id:
-       {
--        StubFrame f(sasm, "range_check_failed", dont_gc_arguments, does_not_return);
-+        StubFrame f(sasm, "range_check_failed", dont_gc_arguments);
-         oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true);
-       }
-       break;
-@@ -1062,7 +1058,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
- 
-     case access_field_patching_id:
-       {
--        StubFrame f(sasm, "access_field_patching", dont_gc_arguments, does_not_return);
-+        StubFrame f(sasm, "access_field_patching", dont_gc_arguments);
-         // we should set up register map
-         oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching));
-       }
-@@ -1070,7 +1066,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
- 
-     case load_klass_patching_id:
-       {
--        StubFrame f(sasm, "load_klass_patching", dont_gc_arguments, does_not_return);
-+        StubFrame f(sasm, "load_klass_patching", dont_gc_arguments);
-         // we should set up register map
-         oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching));
-       }
-@@ -1078,7 +1074,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
- 
-     case load_mirror_patching_id:
-       {
--        StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments, does_not_return);
-+        StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments);
-         // we should set up register map
-         oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching));
-       }
-@@ -1086,7 +1082,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
- 
-     case load_appendix_patching_id:
-       {
--        StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments, does_not_return);
-+        StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments);
-         // we should set up register map
-         oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching));
-       }
-@@ -1109,14 +1105,14 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
- 
-     case throw_index_exception_id:
-       {
--        StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments, does_not_return);
-+        StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments);
-         oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true);
-       }
-       break;
- 
-     case throw_array_store_exception_id:
-       {
--        StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments, does_not_return);
-+        StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments);
-         // tos + 0: link
-         //     + 1: return address
-         oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true);
-@@ -1125,7 +1121,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
- 
-     case predicate_failed_trap_id:
-       {
--        StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments, does_not_return);
-+        StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments);
- 
-         OopMap* map = save_live_registers(sasm);
-         assert_cond(map != NULL);
-@@ -1156,7 +1152,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
- 
-     default:
-       {
--        StubFrame f(sasm, "unimplemented entry", dont_gc_arguments, does_not_return);
-+        StubFrame f(sasm, "unimplemented entry", dont_gc_arguments);
-         __ li(x10, (int) id);
-         __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), x10);
-         __ should_not_reach_here();
-
-From eb37cfd42e7801c5ce64666c3cd25d40cfb22e76 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Wed, 12 Apr 2023 18:06:40 +0800
-Subject: [PATCH 042/140] Revert JDK-8247691: [aarch64] Incorrect handling of
- VM exceptions in C1 deopt stub/traps
-
----
- src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp | 87 +++++++++++++++------
- 1 file changed, 65 insertions(+), 22 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
-index 1f58bde4df5..1f45fba9de0 100644
---- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
-@@ -581,37 +581,80 @@ OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
- #endif
-   __ reset_last_Java_frame(true);
- 
--#ifdef ASSERT
--  // Check that fields in JavaThread for exception oop and issuing pc are empty
--  Label oop_empty;
--  __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
--  __ beqz(t0, oop_empty);
--  __ stop("exception oop must be empty");
--  __ bind(oop_empty);
-+  // check for pending exceptions
-+  { Label L;
-+    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
-+    __ beqz(t0, L);
-+    // exception pending => remove activation and forward to exception handler
- 
--  Label pc_empty;
--  __ ld(t0, Address(xthread, JavaThread::exception_pc_offset()));
--  __ beqz(t0, pc_empty);
--  __ stop("exception pc must be empty");
--  __ bind(pc_empty);
-+    { Label L1;
-+      __ bnez(x10, L1);                                 // have we deoptimized?
-+      __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
-+      __ bind(L1);
-+    }
-+
-+    // the deopt blob expects exceptions in the special fields of
-+    // JavaThread, so copy and clear pending exception.
-+
-+    // load and clear pending exception
-+    __ ld(x10, Address(xthread, Thread::pending_exception_offset()));
-+    __ sd(zr, Address(xthread, Thread::pending_exception_offset()));
-+
-+    // check that there is really a valid exception
-+    __ verify_not_null_oop(x10);
-+
-+    // load throwing pc: this is the return address of the stub
-+    __ ld(x13, Address(fp, wordSize));
-+
-+#ifdef ASSERT
-+    // Check that fields in JavaThread for exception oop and issuing pc are empty
-+    Label oop_empty;
-+    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
-+    __ beqz(t0, oop_empty);
-+    __ stop("exception oop must be empty");
-+    __ bind(oop_empty);
-+
-+    Label pc_empty;
-+    __ ld(t0, Address(xthread, JavaThread::exception_pc_offset()));
-+    __ beqz(t0, pc_empty);
-+    __ stop("exception pc must be empty");
-+    __ bind(pc_empty);
- #endif
- 
--  // Runtime will return true if the nmethod has been deoptimized, this is the
--  // expected scenario and anything else is an error. Note that we maintain a
--  // check on the result purely as a defensive measure.
--  Label no_deopt;
--  __ beqz(x10, no_deopt);                                // Have we deoptimized?
-+    // store exception oop and throwing pc to JavaThread
-+    __ sd(x10, Address(xthread, JavaThread::exception_oop_offset()));
-+    __ sd(x13, Address(xthread, JavaThread::exception_pc_offset()));
-+
-+    restore_live_registers(sasm);
- 
--  // Perform a re-execute. The proper return address is already on the stack,
--  // we just need to restore registers, pop all of our frames but the return
--  // address and jump to the deopt blob.
-+    __ leave();
-+
-+    // Forward the exception directly to deopt blob. We can blow no
-+    // registers and must leave throwing pc on the stack.  A patch may
-+    // have values live in registers so the entry point with the
-+    // exception in tls.
-+    __ far_jump(RuntimeAddress(deopt_blob->unpack_with_exception_in_tls()));
-+
-+    __ bind(L);
-+  }
-+
-+  // Runtime will return true if the nmethod has been deoptimized during
-+  // the patching process. In that case we must do a deopt reexecute instead.
-+  Label cont;
-+
-+  __ beqz(x10, cont);                                 // have we deoptimized?
-+
-+  // Will reexecute. Proper return address is already on the stack we just restore
-+  // registers, pop all of our frame but the return address and jump to the deopt blob
- 
-   restore_live_registers(sasm);
-   __ leave();
-   __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
- 
--  __ bind(no_deopt);
--  __ stop("deopt not performed");
-+  __ bind(cont);
-+  restore_live_registers(sasm);
-+  __ leave();
-+  __ ret();
- 
-   return oop_maps;
- }
-
-From 3fa279b459fffd1bd1ce158a7fdaa9d8704450a8 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 13 Apr 2023 18:29:27 +0800
-Subject: [PATCH 043/140] Revert JDK-8212681: Refactor IC locking to use a fine
- grained CompiledICLocker
-
----
- src/hotspot/cpu/riscv/compiledIC_riscv.cpp | 2 +-
- src/hotspot/cpu/riscv/nativeInst_riscv.cpp | 3 +--
- 2 files changed, 2 insertions(+), 3 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-index 75bc4be7840..4d1687301fc 100644
---- a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-@@ -113,10 +113,10 @@ void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, ad
- }
- 
- void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
-+  assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
-   // Reset stub.
-   address stub = static_stub->addr();
-   assert(stub != NULL, "stub not found");
--  assert(CompiledICLocker::is_safe(stub), "mt unsafe call");
-   // Creation also verifies the object.
-   NativeMovConstReg* method_holder
-     = nativeMovConstReg_at(stub + NativeFenceI::instruction_size());
-diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
-index 0a05c577860..459683735e9 100644
---- a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
-@@ -146,8 +146,7 @@ address NativeCall::destination() const {
- // during code generation, where no patching lock is needed.
- void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
-   assert(!assert_lock ||
--         (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()) ||
--         CompiledICLocker::is_safe(addr_at(0)),
-+         (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()),
-          "concurrent code patching");
- 
-   ResourceMark rm;
-
-From 727f1a8f9b4a6dfbb0cf2002f12b86b5d5f23362 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 13 Apr 2023 18:36:11 +0800
-Subject: [PATCH 044/140] Revert JDK-8225681:
- vmTestbase/nsk/jvmti/RedefineClasses/StressRedefine fails due a) MT-unsafe
- modification of inline cache
-
----
- src/hotspot/cpu/riscv/compiledIC_riscv.cpp | 9 +++++++--
- 1 file changed, 7 insertions(+), 2 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-index 4d1687301fc..0b13e44c8d6 100644
---- a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-@@ -99,10 +99,15 @@ void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, ad
-   // Creation also verifies the object.
-   NativeMovConstReg* method_holder
-     = nativeMovConstReg_at(stub + NativeFenceI::instruction_size());
--#ifdef ASSERT
-+#ifndef PRODUCT
-   NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address());
- 
--  verify_mt_safe(callee, entry, method_holder, jump);
-+  // read the value once
-+  volatile intptr_t data = method_holder->data();
-+  assert(data == 0 || data == (intptr_t)callee(),
-+         "a) MT-unsafe modification of inline cache");
-+  assert(data == 0 || jump->jump_destination() == entry,
-+         "b) MT-unsafe modification of inline cache");
- #endif
-   // Update stub.
-   method_holder->set_data((intptr_t)callee());
-
-From 26e37551ecc41db0cf8eeb775a5501b4f45b4ffa Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 13 Apr 2023 18:39:52 +0800
-Subject: [PATCH 045/140] Revert JDK-8232046: AArch64 build failure after
- JDK-8225681
-
----
- src/hotspot/cpu/riscv/compiledIC_riscv.cpp |  2 --
- src/hotspot/cpu/riscv/nativeInst_riscv.cpp | 19 ++++---------------
- 2 files changed, 4 insertions(+), 17 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-index 0b13e44c8d6..1cfc92b28fa 100644
---- a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-@@ -126,8 +126,6 @@ void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_
-   NativeMovConstReg* method_holder
-     = nativeMovConstReg_at(stub + NativeFenceI::instruction_size());
-   method_holder->set_data(0);
--  NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
--  jump->set_jump_destination((address)-1);
- }
- 
- //-----------------------------------------------------------------------------
-diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
-index 459683735e9..bfe84fa4e30 100644
---- a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
-@@ -272,15 +272,9 @@ address NativeJump::jump_destination() const {
- 
-   // We use jump to self as the unresolved address which the inline
-   // cache code (and relocs) know about
--  // As a special case we also use sequence movptr_with_offset(r,0), jalr(r,0)
--  // i.e. jump to 0 when we need leave space for a wide immediate
--  // load
--
--  // return -1 if jump to self or to 0
--  if ((dest == (address) this) || dest == 0) {
--    dest = (address) -1;
--  }
- 
-+  // return -1 if jump to self
-+  dest = (dest == (address) this) ? (address) -1 : dest;
-   return dest;
- };
- 
-@@ -302,14 +296,9 @@ address NativeGeneralJump::jump_destination() const {
- 
-   // We use jump to self as the unresolved address which the inline
-   // cache code (and relocs) know about
--  // As a special case we also use jump to 0 when first generating
--  // a general jump
--
--  // return -1 if jump to self or to 0
--  if ((dest == (address) this) || dest == 0) {
--    dest = (address) -1;
--  }
- 
-+  // return -1 if jump to self
-+  dest = (dest == (address) this) ? (address) -1 : dest;
-   return dest;
- }
- 
-
-From 4fc68bc3cd13e623276965947d6c8cb14da15873 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 13 Apr 2023 18:47:08 +0800
-Subject: [PATCH 046/140] Revert JDK-8213084: Rework and enhance
- Print[Opto]Assembly output
-
----
- src/hotspot/cpu/riscv/assembler_riscv.hpp    |  8 --------
- src/hotspot/cpu/riscv/disassembler_riscv.hpp | 20 --------------------
- 2 files changed, 28 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
-index 44e8d4b4ff1..b4e7287ce08 100644
---- a/src/hotspot/cpu/riscv/assembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
-@@ -268,14 +268,6 @@ class Assembler : public AbstractAssembler {
- 
-   enum { instruction_size = 4 };
- 
--  //---<  calculate length of instruction  >---
--  // We just use the values set above.
--  // instruction must start at passed address
--  static unsigned int instr_len(unsigned char *instr) { return instruction_size; }
--
--  //---<  longest instructions  >---
--  static unsigned int instr_maxlen() { return instruction_size; }
--
-   enum RoundingMode {
-     rne = 0b000,     // round to Nearest, ties to Even
-     rtz = 0b001,     // round towards Zero
-diff --git a/src/hotspot/cpu/riscv/disassembler_riscv.hpp b/src/hotspot/cpu/riscv/disassembler_riscv.hpp
-index b0e5560c906..06bca5298cd 100644
---- a/src/hotspot/cpu/riscv/disassembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/disassembler_riscv.hpp
-@@ -35,24 +35,4 @@ static const char* pd_cpu_opts() {
-   return "";
- }
- 
--// Returns address of n-th instruction preceding addr,
--// NULL if no preceding instruction can be found.
--// On riscv, we assume a constant instruction length.
--// It might be beneficial to check "is_readable" as we do on ppc and s390.
--static address find_prev_instr(address addr, int n_instr) {
--  return addr - Assembler::instruction_size * n_instr;
--}
--
--// special-case instruction decoding.
--// There may be cases where the binutils disassembler doesn't do
--// the perfect job. In those cases, decode_instruction0 may kick in
--// and do it right.
--// If nothing had to be done, just return "here", otherwise return "here + instr_len(here)"
--static address decode_instruction0(address here, outputStream* st, address virtual_begin = NULL) {
--  return here;
--}
--
--// platform-specific instruction annotations (like value of loaded constants)
--static void annotate(address pc, outputStream* st) {}
--
- #endif // CPU_RISCV_DISASSEMBLER_RISCV_HPP
-
-From f660c594eccb174c9779ebdc9ba40fe579aa50cc Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 13 Apr 2023 19:44:28 +0800
-Subject: [PATCH 047/140] Revert JDK-8241909: Remove useless code cache lookup
- in frame::patch_pc
-
----
- src/hotspot/cpu/riscv/frame_riscv.cpp | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
-index b056eb2488a..d03adc0bff4 100644
---- a/src/hotspot/cpu/riscv/frame_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
-@@ -270,7 +270,6 @@ bool frame::safe_for_sender(JavaThread *thread) {
- }
- 
- void frame::patch_pc(Thread* thread, address pc) {
--  assert(_cb == CodeCache::find_blob(pc), "unexpected pc");
-   address* pc_addr = &(((address*) sp())[-1]);
-   if (TracePcPatching) {
-     tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
-@@ -280,6 +279,7 @@ void frame::patch_pc(Thread* thread, address pc) {
-   // patch in the same address that's already there.
-   assert(_pc == *pc_addr || pc == *pc_addr, "must be");
-   *pc_addr = pc;
-+  _cb = CodeCache::find_blob(pc);
-   address original_pc = CompiledMethod::get_deopt_original_pc(this);
-   if (original_pc != NULL) {
-     assert(original_pc == _pc, "expected original PC to be stored before patching");
-
-From 0d1ed436d9b70c9244c5de42fb492bbfa5e785e8 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 16 Apr 2023 21:10:06 +0800
-Subject: [PATCH 048/140] Revert JDK-8277411: C2 fast_unlock intrinsic on
- AArch64 has unnecessary ownership check & JDK-8277180: Intrinsify recursive
- ObjectMonitor locking for C2 x64 and A64
-
----
- src/hotspot/cpu/riscv/riscv.ad | 24 ++++--------------------
- 1 file changed, 4 insertions(+), 20 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 9da8a76c190..c0fbda4f3f9 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -2204,16 +2204,6 @@ encode %{
-     __ mv(tmp, (address)markOopDesc::unused_mark());
-     __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
- 
--    __ beqz(flag, cont); // CAS success means locking succeeded
--
--    __ bne(flag, xthread, cont); // Check for recursive locking
--
--    // Recursive lock case
--    __ mv(flag, zr);
--    __ ld(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markOopDesc::monitor_value));
--    __ add(tmp, tmp, 1u);
--    __ sd(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markOopDesc::monitor_value));
--
-     __ bind(cont);
-   %}
- 
-@@ -2257,18 +2247,12 @@ encode %{
-     __ bind(object_has_monitor);
-     STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX);
-     __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor
-+    __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
-     __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
-+    __ xorr(flag, flag, xthread); // Will be 0 if we are the owner.
-+    __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions
-+    __ bnez(flag, cont);
- 
--    Label notRecursive;
--    __ beqz(disp_hdr, notRecursive); // Will be 0 if not recursive.
--
--    // Recursive lock
--    __ addi(disp_hdr, disp_hdr, -1);
--    __ sd(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
--    __ mv(flag, zr);
--    __ j(cont);
--
--    __ bind(notRecursive);
-     __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
-     __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
-     __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0.
-
-From cac7117dfc03023a81030e274944921df07bbead Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 16 Apr 2023 21:13:21 +0800
-Subject: [PATCH 049/140] Revert JDK-8210381: Obsolete EmitSync
-
----
- src/hotspot/cpu/riscv/riscv.ad | 100 ++++++++++++++++++++-------------
- 1 file changed, 60 insertions(+), 40 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index c0fbda4f3f9..c3ef648b21d 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -2150,9 +2150,17 @@ encode %{
-     // Load markWord from object into displaced_header.
-     __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
- 
-+    // Always do locking in runtime.
-+    if (EmitSync & 0x01) {
-+      __ mv(flag, 1);
-+      return;
-+    }
-+
-     // Check for existing monitor
--    __ andi(t0, disp_hdr, markOopDesc::monitor_value);
--    __ bnez(t0, object_has_monitor);
-+    if ((EmitSync & 0x02) == 0) {
-+      __ andi(t0, disp_hdr, markOopDesc::monitor_value);
-+      __ bnez(t0, object_has_monitor);
-+    }
- 
-     // Set tmp to be (markWord of object | UNLOCK_VALUE).
-     __ ori(tmp, disp_hdr, markOopDesc::unlocked_value);
-@@ -2185,24 +2193,26 @@ encode %{
-     __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
-     __ mv(flag, tmp); // we can use the value of tmp as the result here
- 
--    __ j(cont);
--
--    // Handle existing monitor.
--    __ bind(object_has_monitor);
--    // The object's monitor m is unlocked iff m->owner == NULL,
--    // otherwise m->owner may contain a thread or a stack address.
--    //
--    // Try to CAS m->owner from NULL to current thread.
--    __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value));
--    __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq,
--             Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected)
--
--    // Store a non-null value into the box to avoid looking like a re-entrant
--    // lock. The fast-path monitor unlock code checks for
--    // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the
--    // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
--    __ mv(tmp, (address)markOopDesc::unused_mark());
--    __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
-+    if ((EmitSync & 0x02) == 0) {
-+      __ j(cont);
-+
-+      // Handle existing monitor.
-+      __ bind(object_has_monitor);
-+      // The object's monitor m is unlocked iff m->owner == NULL,
-+      // otherwise m->owner may contain a thread or a stack address.
-+      //
-+      // Try to CAS m->owner from NULL to current thread.
-+      __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value));
-+      __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq,
-+                 Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected)
-+
-+      // Store a non-null value into the box to avoid looking like a re-entrant
-+      // lock. The fast-path monitor unlock code checks for
-+      // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the
-+      // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
-+      __ mv(tmp, (address)markOopDesc::unused_mark());
-+      __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
-+    }
- 
-     __ bind(cont);
-   %}
-@@ -2220,6 +2230,12 @@ encode %{
- 
-     assert_different_registers(oop, box, tmp, disp_hdr, flag);
- 
-+    // Always do locking in runtime.
-+    if (EmitSync & 0x01) {
-+      __ mv(flag, 1);
-+      return;
-+    }
-+
-     // Find the lock address and load the displaced header from the stack.
-     __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
- 
-@@ -2228,9 +2244,11 @@ encode %{
-     __ beqz(disp_hdr, cont);
- 
-     // Handle existing monitor.
--    __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
--    __ andi(t0, disp_hdr, markOopDesc::monitor_value);
--    __ bnez(t0, object_has_monitor);
-+    if ((EmitSync & 0x02) == 0) {
-+      __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
-+      __ andi(t0, disp_hdr, markOopDesc::monitor_value);
-+      __ bnez(t0, object_has_monitor);
-+    }
- 
-     // Check if it is still a light weight lock, this is true if we
-     // see the stack address of the basicLock in the markWord of the
-@@ -2244,23 +2262,25 @@ encode %{
-     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
- 
-     // Handle existing monitor.
--    __ bind(object_has_monitor);
--    STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX);
--    __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor
--    __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
--    __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
--    __ xorr(flag, flag, xthread); // Will be 0 if we are the owner.
--    __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions
--    __ bnez(flag, cont);
--
--    __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
--    __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
--    __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0.
--    __ bnez(flag, cont);
--    // need a release store here
--    __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
--    __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
--    __ sd(zr, Address(tmp)); // set unowned
-+    if ((EmitSync & 0x02) == 0) {
-+      __ bind(object_has_monitor);
-+      STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX);
-+      __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor
-+      __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
-+      __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
-+      __ xorr(flag, flag, xthread); // Will be 0 if we are the owner.
-+      __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions
-+      __ bnez(flag, cont);
-+
-+      __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
-+      __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
-+      __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0.
-+      __ bnez(flag, cont);
-+      // need a release store here
-+      __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
-+      __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
-+      __ sd(zr, Address(tmp)); // set unowned
-+    }
- 
-     __ bind(cont);
-   %}
-
-From ca7ab86ee886233651e1a79faff631fd7e226d57 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 16 Apr 2023 22:07:21 +0800
-Subject: [PATCH 050/140] Revert JDK-8256425: Obsolete Biased Locking in JDK 18
-
----
- src/hotspot/cpu/riscv/assembler_riscv.hpp     |   2 +
- .../cpu/riscv/c1_LIRAssembler_riscv.cpp       |   6 +-
- .../cpu/riscv/c1_LIRGenerator_riscv.cpp       |   7 +-
- .../cpu/riscv/c1_MacroAssembler_riscv.cpp     |  35 ++-
- .../cpu/riscv/c1_MacroAssembler_riscv.hpp     |   3 +-
- src/hotspot/cpu/riscv/interp_masm_riscv.cpp   |  27 ++-
- .../cpu/riscv/macroAssembler_riscv.cpp        | 217 ++++++++++++++++++
- .../cpu/riscv/macroAssembler_riscv.hpp        |  28 +++
- src/hotspot/cpu/riscv/riscv.ad                |  12 +
- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp |   8 +
- src/hotspot/cpu/riscv/templateTable_riscv.cpp |   8 +-
- 11 files changed, 341 insertions(+), 12 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
-index b4e7287ce08..51aa052a0c7 100644
---- a/src/hotspot/cpu/riscv/assembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
-@@ -3043,4 +3043,6 @@ enum Nf {
-   virtual ~Assembler() {}
- };
- 
-+class BiasedLockingCounters;
-+
- #endif // CPU_RISCV_ASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-index 46a20a64194..6a961ee2307 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-@@ -1511,9 +1511,13 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) {
-   if (!UseFastLocking) {
-     __ j(*op->stub()->entry());
-   } else if (op->code() == lir_lock) {
-+    Register scratch = noreg;
-+    if (UseBiasedLocking) {
-+      scratch = op->scratch_opr()->as_register();
-+    }
-     assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
-     // add debug info for NullPointerException only if one is possible
--    int null_check_offset = __ lock_object(hdr, obj, lock, *op->stub()->entry());
-+    int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry());
-     if (op->info() != NULL) {
-       add_debug_info_for_null_check(null_check_offset, op->info());
-     }
-diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-index e126f148cdf..c45a75b2301 100644
---- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-@@ -277,6 +277,11 @@ void LIRGenerator::do_MonitorEnter(MonitorEnter* x) {
- 
-   // "lock" stores the address of the monitor stack slot, so this is not an oop
-   LIR_Opr lock = new_register(T_INT);
-+  // Need a scratch register for biased locking
-+  LIR_Opr scratch = LIR_OprFact::illegalOpr;
-+  if (UseBiasedLocking) {
-+    scratch = new_register(T_INT);
-+  }
- 
-   CodeEmitInfo* info_for_exception = NULL;
-   if (x->needs_null_check()) {
-@@ -285,7 +290,7 @@ void LIRGenerator::do_MonitorEnter(MonitorEnter* x) {
-   // this CodeEmitInfo must not have the xhandlers because here the
-   // object is already locked (xhandlers expect object to be unlocked)
-   CodeEmitInfo* info = state_for(x, x->state(), true);
--  monitor_enter(obj.result(), lock, syncTempOpr(), LIR_OprFact::illegalOpr,
-+  monitor_enter(obj.result(), lock, syncTempOpr(), scratch,
-                 x->monitor_no(), info_for_exception, info);
- }
- 
-diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-index 2d52343587e..e486f41948e 100644
---- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-@@ -35,6 +35,7 @@
- #include "oops/arrayOop.hpp"
- #include "oops/markWord.hpp"
- #include "runtime/basicLock.hpp"
-+#include "runtime/biasedLocking.hpp"
- #include "runtime/os.hpp"
- #include "runtime/sharedRuntime.hpp"
- #include "runtime/stubRoutines.hpp"
-@@ -50,7 +51,7 @@ void C1_MacroAssembler::float_cmp(bool is_float, int unordered_result,
-   }
- }
- 
--int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) {
-+int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) {
-   const int aligned_mask = BytesPerWord - 1;
-   const int hdr_offset = oopDesc::mark_offset_in_bytes();
-   assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
-@@ -62,7 +63,12 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr
-   // save object being locked into the BasicObjectLock
-   sd(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
- 
--  null_check_offset = offset();
-+  if (UseBiasedLocking) {
-+    assert(scratch != noreg, "should have scratch register at this point");
-+    null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case);
-+  } else {
-+    null_check_offset = offset();
-+  }
- 
-   // Load object header
-   ld(hdr, Address(obj, hdr_offset));
-@@ -98,6 +104,10 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr
-   // otherwise we don't care about the result and handle locking via runtime call
-   bnez(hdr, slow_case, /* is_far */ true);
-   bind(done);
-+  if (PrintBiasedLockingStatistics) {
-+    la(t1, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr()));
-+    add_memory_int32(Address(t1, 0), 1);
-+  }
-   return null_check_offset;
- }
- 
-@@ -107,13 +117,21 @@ void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_
-   assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
-   Label done;
- 
-+  if (UseBiasedLocking) {
-+    // load object
-+    ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
-+    biased_locking_exit(obj, hdr, done);
-+  }
-+
-   // load displaced header
-   ld(hdr, Address(disp_hdr, 0));
-   // if the loaded hdr is NULL we had recursive locking
-   // if we had recursive locking, we are done
-   beqz(hdr, done);
--  // load object
--  ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
-+  if (!UseBiasedLocking) {
-+    // load object
-+    ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
-+  }
-   verify_oop(obj);
-   // test if object header is pointing to the displaced header, and if so, restore
-   // the displaced header in the object - if the object header is not pointing to
-@@ -140,8 +158,13 @@ void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, i
- 
- void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2) {
-   assert_different_registers(obj, klass, len);
--  // This assumes that all prototype bits fitr in an int32_t
--  mv(tmp1, (int32_t)(intptr_t)markOopDesc::prototype());
-+  if (UseBiasedLocking && !len->is_valid()) {
-+    assert_different_registers(obj, klass, len, tmp1, tmp2);
-+    ld(tmp1, Address(klass, Klass::prototype_header_offset()));
-+  } else {
-+    // This assumes that all prototype bits fitr in an int32_t
-+    mv(tmp1, (int32_t)(intptr_t)markOopDesc::prototype());
-+  }
-   sd(tmp1, Address(obj, oopDesc::mark_offset_in_bytes()));
- 
-   if (UseCompressedClassPointers) { // Take care not to kill klass
-diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp
-index dfd3c17d7c7..1950cee5dd5 100644
---- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp
-@@ -59,8 +59,9 @@ using MacroAssembler::null_check;
-   // hdr     : must be x10, contents destroyed
-   // obj     : must point to the object to lock, contents preserved
-   // disp_hdr: must point to the displaced header location, contents preserved
-+  // scratch : scratch register, contents destroyed
-   // returns code offset at which to add null check debug information
--  int lock_object  (Register swap, Register obj, Register disp_hdr, Label& slow_case);
-+  int lock_object  (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case);
- 
-   // unlocking
-   // hdr     : contents destroyed
-diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-index 4e642af87c4..f0c249f0d26 100644
---- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-@@ -39,6 +39,7 @@
- #include "prims/jvmtiExport.hpp"
- #include "prims/jvmtiThreadState.hpp"
- #include "runtime/basicLock.hpp"
-+#include "runtime/biasedLocking.hpp"
- #include "runtime/frame.inline.hpp"
- #include "runtime/safepointMechanism.hpp"
- #include "runtime/sharedRuntime.hpp"
-@@ -782,6 +783,10 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg)
-     // Load object pointer into obj_reg c_rarg3
-     ld(obj_reg, Address(lock_reg, obj_offset));
- 
-+    if (UseBiasedLocking) {
-+      biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case);
-+    }
-+
-     // Load (object->mark() | 1) into swap_reg
-     ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-     ori(swap_reg, t0, 1);
-@@ -792,7 +797,17 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg)
-     assert(lock_offset == 0,
-            "displached header must be first word in BasicObjectLock");
- 
--    cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, done, /*fallthrough*/NULL);
-+    if (PrintBiasedLockingStatistics) {
-+      Label fail, fast;
-+      cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, fast, &fail);
-+      bind(fast);
-+      atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
-+                  t1, t0);
-+      j(done);
-+      bind(fail);
-+    } else {
-+      cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, done, /*fallthrough*/NULL);
-+    }
- 
-     // Test if the oopMark is an obvious stack pointer, i.e.,
-     //  1) (mark & 7) == 0, and
-@@ -809,6 +824,12 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg)
- 
-     // Save the test result, for recursive case, the result is zero
-     sd(swap_reg, Address(lock_reg, mark_offset));
-+
-+    if (PrintBiasedLockingStatistics) {
-+      bnez(swap_reg, slow_case);
-+      atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
-+                  t1, t0);
-+    }
-     beqz(swap_reg, done);
- 
-     bind(slow_case);
-@@ -861,6 +882,10 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg)
-     // Free entry
-     sd(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
- 
-+    if (UseBiasedLocking) {
-+      biased_locking_exit(obj_reg, header_reg, done);
-+    }
-+
-     // Load the old header from BasicLock structure
-     ld(header_reg, Address(swap_reg,
-                            BasicLock::displaced_header_offset_in_bytes()));
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index 73629e3dba3..e557a134b5b 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -41,6 +41,7 @@
- #include "oops/compressedOops.inline.hpp"
- #include "oops/klass.inline.hpp"
- #include "oops/oop.hpp"
-+#include "runtime/biasedLocking.hpp"
- #include "runtime/interfaceSupport.inline.hpp"
- #include "runtime/jniHandles.inline.hpp"
- #include "runtime/sharedRuntime.hpp"
-@@ -2791,6 +2792,222 @@ void MacroAssembler::reserved_stack_check() {
-     bind(no_reserved_zone_enabling);
- }
- 
-+void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) {
-+  Label retry_load;
-+  bind(retry_load);
-+  // flush and load exclusive from the memory location
-+  lr_w(tmp, counter_addr);
-+  addw(tmp, tmp, 1);
-+  // if we store+flush with no intervening write tmp wil be zero
-+  sc_w(tmp, tmp, counter_addr);
-+  bnez(tmp, retry_load);
-+}
-+
-+void MacroAssembler::load_prototype_header(Register dst, Register src) {
-+  load_klass(dst, src);
-+  ld(dst, Address(dst, Klass::prototype_header_offset()));
-+}
-+
-+int MacroAssembler::biased_locking_enter(Register lock_reg,
-+                                         Register obj_reg,
-+                                         Register swap_reg,
-+                                         Register tmp_reg,
-+                                         bool swap_reg_contains_mark,
-+                                         Label& done,
-+                                         Label* slow_case,
-+                                         BiasedLockingCounters* counters,
-+                                         Register flag) {
-+  assert(UseBiasedLocking, "why call this otherwise?");
-+  assert_different_registers(lock_reg, obj_reg, swap_reg);
-+
-+  if (PrintBiasedLockingStatistics && counters == NULL)
-+    counters = BiasedLocking::counters();
-+
-+  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, t0);
-+  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
-+  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
-+
-+  // Biased locking
-+  // See whether the lock is currently biased toward our thread and
-+  // whether the epoch is still valid
-+  // Note that the runtime guarantees sufficient alignment of JavaThread
-+  // pointers to allow age to be placed into low bits
-+  // First check to see whether biasing is even enabled for this object
-+  Label cas_label;
-+  int null_check_offset = -1;
-+  if (!swap_reg_contains_mark) {
-+    null_check_offset = offset();
-+    ld(swap_reg, mark_addr);
-+  }
-+  andi(tmp_reg, swap_reg, markOopDesc::biased_lock_mask_in_place);
-+  li(t0, markOopDesc::biased_lock_pattern);
-+  bne(t0, tmp_reg, cas_label);
-+  // The bias pattern is present in the object's header. Need to check
-+  // whether the bias owner and the epoch are both still current.
-+  load_prototype_header(tmp_reg, obj_reg);
-+  orr(tmp_reg, tmp_reg, xthread);
-+  xorr(tmp_reg, swap_reg, tmp_reg);
-+  andi(tmp_reg, tmp_reg, ~((int) markOopDesc::age_mask_in_place));
-+  if (flag->is_valid()) {
-+    mv(flag, tmp_reg);
-+  }
-+  if (counters != NULL) {
-+    Label around;
-+    bnez(tmp_reg, around);
-+    atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, t0);
-+    j(done);
-+    bind(around);
-+  } else {
-+    beqz(tmp_reg, done);
-+  }
-+
-+  Label try_revoke_bias;
-+  Label try_rebias;
-+
-+  // At this point we know that the header has the bias pattern and
-+  // that we are not the bias owner in the current epoch. We need to
-+  // figure out more details about the state of the header in order to
-+  // know what operations can be legally performed on the object's
-+  // header.
-+
-+  // If the low three bits in the xor result aren't clear, that means
-+  // the prototype header is no longer biased and we have to revoke
-+  // the bias on this object.
-+  andi(t0, tmp_reg, markOopDesc::biased_lock_mask_in_place);
-+  bnez(t0, try_revoke_bias);
-+
-+  // Biasing is still enabled for this data type. See whether the
-+  // epoch of the current bias is still valid, meaning that the epoch
-+  // bits of the mark word are equal to the epoch bits of the
-+  // prototype header. (Note that the prototype header's epoch bits
-+  // only change at a safepoint.) If not, attempt to rebias the object
-+  // toward the current thread. Note that we must be absolutely sure
-+  // that the current epoch is invalid in order to do this because
-+  // otherwise the manipulations it performs on the mark word are
-+  // illegal.
-+  andi(t0, tmp_reg, markOopDesc::epoch_mask_in_place);
-+  bnez(t0, try_rebias);
-+
-+  // The epoch of the current bias is still valid but we know nothing
-+  // about the owner; it might be set or it might be clear. Try to
-+  // acquire the bias of the object using an atomic operation. If this
-+  // fails we will go in to the runtime to revoke the object's bias.
-+  // Note that we first construct the presumed unbiased header so we
-+  // don't accidentally blow away another thread's valid bias.
-+  {
-+    Label cas_success;
-+    Label counter;
-+    mv(t0, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
-+    andr(swap_reg, swap_reg, t0);
-+    orr(tmp_reg, swap_reg, xthread);
-+    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case);
-+    // cas failed here if slow_cass == NULL
-+    if (flag->is_valid()) {
-+      mv(flag, 1);
-+      j(counter);
-+    }
-+    // If the biasing toward our thread failed, this means that
-+    // another thread succeeded in biasing it toward itself and we
-+    // need to revoke that bias. The revocation will occur in the
-+    // interpreter runtime in the slow case.
-+    bind(cas_success);
-+    if (flag->is_valid()) {
-+      mv(flag, 0);
-+      bind(counter);
-+    }
-+    if (counters != NULL) {
-+      atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()),
-+                  tmp_reg, t0);
-+    }
-+  }
-+  j(done);
-+
-+  bind(try_rebias);
-+  // At this point we know the epoch has expired, meaning that the
-+  // current "bias owner", if any, is actually invalid. Under these
-+  // circumstances _only_, we are allowed to use the current header's
-+  // value as the comparison value when doing the cas to acquire the
-+  // bias in the current epoch. In other words, we allow transfer of
-+  // the bias from one thread to another directly in this situation.
-+  //
-+  // FIXME: due to a lack of registers we currently blow away the age
-+  // bits in this situation. Should attempt to preserve them.
-+  {
-+    Label cas_success;
-+    Label counter;
-+    load_prototype_header(tmp_reg, obj_reg);
-+    orr(tmp_reg, xthread, tmp_reg);
-+    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case);
-+    // cas failed here if slow_cass == NULL
-+    if (flag->is_valid()) {
-+      mv(flag, 1);
-+      j(counter);
-+    }
-+
-+    // If the biasing toward our thread failed, then another thread
-+    // succeeded in biasing it toward itself and we need to revoke that
-+    // bias. The revocation will occur in the runtime in the slow case.
-+    bind(cas_success);
-+    if (flag->is_valid()) {
-+      mv(flag, 0);
-+      bind(counter);
-+    }
-+    if (counters != NULL) {
-+      atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()),
-+                  tmp_reg, t0);
-+    }
-+  }
-+  j(done);
-+
-+  bind(try_revoke_bias);
-+  // The prototype mark in the klass doesn't have the bias bit set any
-+  // more, indicating that objects of this data type are not supposed
-+  // to be biased any more. We are going to try to reset the mark of
-+  // this object to the prototype value and fall through to the
-+  // CAS-based locking scheme. Note that if our CAS fails, it means
-+  // that another thread raced us for the privilege of revoking the
-+  // bias of this particular object, so it's okay to continue in the
-+  // normal locking code.
-+  //
-+  // FIXME: due to a lack of registers we currently blow away the age
-+  // bits in this situation. Should attempt to preserve them.
-+  {
-+    Label cas_success, nope;
-+    load_prototype_header(tmp_reg, obj_reg);
-+    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, &nope);
-+    bind(cas_success);
-+
-+    // Fall through to the normal CAS-based lock, because no matter what
-+    // the result of the above CAS, some thread must have succeeded in
-+    // removing the bias bit from the object's header.
-+    if (counters != NULL) {
-+      atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg,
-+                  t0);
-+    }
-+    bind(nope);
-+  }
-+
-+  bind(cas_label);
-+
-+  return null_check_offset;
-+}
-+
-+void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done, Register flag) {
-+  assert(UseBiasedLocking, "why call this otherwise?");
-+
-+  // Check for biased locking unlock case, which is a no-op
-+  // Note: we do not have to check the thread ID for two reasons.
-+  // First, the interpreter checks for IllegalMonitorStateException at
-+  // a higher level. Second, if the bias was revoked while we held the
-+  // lock, the object could not be rebiased toward another thread, so
-+  // the bias bit would be clear.
-+  ld(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-+  andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
-+  sub(tmp_reg, tmp_reg, markOopDesc::biased_lock_pattern);
-+  if (flag->is_valid()) { mv(flag, tmp_reg); }
-+  beqz(tmp_reg, done);
-+}
-+
- // Move the address of the polling page into dest.
- void MacroAssembler::get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype) {
-   if (SafepointMechanism::uses_thread_local_poll()) {
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index 8a2c6e07d88..c1ffa120774 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -47,6 +47,32 @@ class MacroAssembler: public Assembler {
-   void safepoint_poll(Label& slow_path);
-   void safepoint_poll_acquire(Label& slow_path);
- 
-+  // Biased locking support
-+  // lock_reg and obj_reg must be loaded up with the appropriate values.
-+  // swap_reg is killed.
-+  // tmp_reg must be supplied and must not be rscratch1 or rscratch2
-+  // Optional slow case is for implementations (interpreter and C1) which branch to
-+  // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
-+  // Returns offset of first potentially-faulting instruction for null
-+  // check info (currently consumed only by C1). If
-+  // swap_reg_contains_mark is true then returns -1 as it is assumed
-+  // the calling code has already passed any potential faults.
-+  int biased_locking_enter(Register lock_reg, Register obj_reg,
-+                           Register swap_reg, Register tmp_reg,
-+                           bool swap_reg_contains_mark,
-+                           Label& done, Label* slow_case = NULL,
-+                           BiasedLockingCounters* counters = NULL,
-+                           Register flag = noreg);
-+  void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done, Register flag = noreg);
-+
-+  // Helper functions for statistics gathering.
-+  // Unconditional atomic increment.
-+  void atomic_incw(Register counter_addr, Register tmp);
-+  void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) {
-+    la(tmp1, counter_addr);
-+    atomic_incw(tmp1, tmp2);
-+  }
-+
-   // Place a fence.i after code may have been modified due to a safepoint.
-   void safepoint_ifence();
- 
-@@ -225,6 +251,8 @@ class MacroAssembler: public Assembler {
-   // stored using routines that take a jobject.
-   void store_heap_oop_null(Address dst);
- 
-+  void load_prototype_header(Register dst, Register src);
-+
-   // This dummy is to prevent a call to store_heap_oop from
-   // converting a zero (linke NULL) into a Register by giving
-   // the compiler two choices it can't resolve
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index c3ef648b21d..c2a0be140e9 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -2156,6 +2156,10 @@ encode %{
-       return;
-     }
- 
-+    if (UseBiasedLocking && !UseOptoBiasInlining) {
-+      __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont, /*slow_case*/NULL, NULL, flag);
-+    }
-+
-     // Check for existing monitor
-     if ((EmitSync & 0x02) == 0) {
-       __ andi(t0, disp_hdr, markOopDesc::monitor_value);
-@@ -2236,6 +2240,10 @@ encode %{
-       return;
-     }
- 
-+    if (UseBiasedLocking && !UseOptoBiasInlining) {
-+      __ biased_locking_exit(oop, tmp, cont, flag);
-+    }
-+
-     // Find the lock address and load the displaced header from the stack.
-     __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
- 
-@@ -4961,6 +4969,10 @@ instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFla
-   ins_pipe(pipe_serial);
- %}
- 
-+// storeLConditional is used by PhaseMacroExpand::expand_lock_node
-+// when attempting to rebias a lock towards the current thread.  We
-+// must use the acquire form of cmpxchg in order to guarantee acquire
-+// semantics in this case.
- instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
- %{
-   match(Set cr (StoreLConditional mem (Binary oldval newval)));
-diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-index d740c99c979..eaefcc2b595 100644
---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-@@ -1489,6 +1489,10 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
-     // Load the oop from the handle
-     __ ld(obj_reg, Address(oop_handle_reg, 0));
- 
-+    if (UseBiasedLocking) {
-+      __ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, lock_done, &slow_path_lock);
-+    }
-+
-     // Load (object->mark() | 1) into swap_reg % x10
-     __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-     __ ori(swap_reg, t0, 1);
-@@ -1597,6 +1601,10 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
- 
-     Label done;
- 
-+    if (UseBiasedLocking) {
-+      __ biased_locking_exit(obj_reg, old_hdr, done);
-+    }
-+
-     // Simple recursive lock?
-     __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
-     __ beqz(t0, done);
-diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-index c9d399ccdaf..1e23fb4dc09 100644
---- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-@@ -3563,9 +3563,13 @@ void TemplateTable::_new() {
-       __ bnez(x13, loop);
-     }
- 
--    // initialize object hader only.
-+    // initialize object header only.
-     __ bind(initialize_header);
--    __ mv(t0, (intptr_t)markOopDesc::prototype());
-+    if (UseBiasedLocking) {
-+      __ ld(t0, Address(x14, Klass::prototype_header_offset()));
-+    } else {
-+      __ mv(t0, (intptr_t)markOopDesc::prototype());
-+    }
-     __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes()));
-     __ store_klass_gap(x10, zr);   // zero klass gap for compressed oops
-     __ store_klass(x10, x14);      // store klass last
-
-From 864e551505bb816f3dc8a3bd1b065328ba7b5d65 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Mon, 17 Apr 2023 19:52:44 +0800
-Subject: [PATCH 051/140] Revert JDK-8227680: FastJNIAccessors: Check for JVMTI
- field access event requests at runtime
-
----
- .../cpu/riscv/jniFastGetField_riscv.cpp       | 32 ++++---------------
- 1 file changed, 6 insertions(+), 26 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
-index 814ed23e471..f6e7351c4fc 100644
---- a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
-@@ -83,28 +83,10 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
-   // An even value means there are no ongoing safepoint operations
-   __ andi(t0, rcounter, 1);
-   __ bnez(t0, slow);
--
--  if (JvmtiExport::can_post_field_access()) {
--    // Using barrier to order wrt. JVMTI check and load of result.
--    __ membar(MacroAssembler::LoadLoad);
--
--    // Check to see if a field access watch has been set before we
--    // take the fast path.
--    int32_t offset2;
--    __ la_patchable(result,
--                    ExternalAddress((address) JvmtiExport::get_field_access_count_addr()),
--                    offset2);
--    __ lwu(result, Address(result, offset2));
--    __ bnez(result, slow);
--
--    __ mv(robj, c_rarg1);
--  } else {
--    // Using address dependency to order wrt. load of result.
--    __ xorr(robj, c_rarg1, rcounter);
--    __ xorr(robj, robj, rcounter);               // obj, since
--                                                 // robj ^ rcounter ^ rcounter == robj
--                                                 // robj is address dependent on rcounter.
--  }
-+  __ xorr(robj, c_rarg1, rcounter);
-+  __ xorr(robj, robj, rcounter);               // obj, since
-+                                               // robj ^ rcounter ^ rcounter == robj
-+                                               // robj is address dependent on rcounter.
- 
-   // Both robj and t0 are clobbered by try_resolve_jobject_in_native.
-   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
-@@ -137,10 +119,8 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
-     default:        ShouldNotReachHere();
-   }
- 
--  // Using acquire: Order JVMTI check and load of result wrt. succeeding check
--  // (LoadStore for volatile field).
--  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
--
-+  __ xorr(rcounter_addr, rcounter_addr, result);
-+  __ xorr(rcounter_addr, rcounter_addr, result);
-   __ lw(t0, safepoint_counter_addr);
-   __ bne(rcounter, t0, slow);
- 
-
-From b822b64cb6be38cb7806fda3d56675674557c163 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 18 Apr 2023 16:34:32 +0800
-Subject: [PATCH 052/140] Revert JDK-8249768: Move static oops and
- NullPointerException oops from Universe into OopStorage
-
----
- src/hotspot/cpu/riscv/templateTable_riscv.cpp | 1 -
- 1 file changed, 1 deletion(-)
-
-diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-index 1e23fb4dc09..fbcdcf60d9c 100644
---- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-@@ -411,7 +411,6 @@ void TemplateTable::fast_aldc(bool wide)
-     int32_t offset = 0;
-     __ movptr_with_offset(rarg, Universe::the_null_sentinel_addr(), offset);
-     __ ld(tmp, Address(rarg, offset));
--    __ resolve_oop_handle(tmp);
-     __ bne(result, tmp, notNull);
-     __ mv(result, zr);  // NULL object reference
-     __ bind(notNull);
-
-From c82c482aa065ffd39eab6b87a0ad6c6cbca1e3af Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 18 Apr 2023 16:58:23 +0800
-Subject: [PATCH 053/140] Revert JDK-8217998: Remove method_type field
- associated with the appendix field of an indy or method handle call
-
----
- src/hotspot/cpu/riscv/templateTable_riscv.cpp | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-index fbcdcf60d9c..158294f7436 100644
---- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-@@ -3192,6 +3192,7 @@ void TemplateTable::prepare_invoke(int byte_no,
-     // since the parameter_size includes it.
-     __ push_reg(x9);
-     __ mv(x9, index);
-+    assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
-     __ load_resolved_reference_at_index(index, x9);
-     __ pop_reg(x9);
-     __ push_reg(index);  // push appendix (MethodType, CallSite, etc.)
-
-From 3e50d62dd06c3f8bc586e3ab2b00f2f587d950bf Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 16:04:31 +0800
-Subject: [PATCH 054/140] Revert JDK-8277372: Add getters for BOT and card
- table members
-
----
- src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp | 4 ++--
- .../riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp  | 6 +++---
- 2 files changed, 5 insertions(+), 5 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-index 1c46b3947d3..6b75bf63781 100644
---- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-@@ -215,7 +215,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
-   ExternalAddress cardtable((address) ct->byte_map_base());
-   const Register card_addr = tmp;
- 
--  __ srli(card_addr, store_addr, CardTable::card_shift());
-+  __ srli(card_addr, store_addr, CardTable::card_shift);
- 
-   // get the address of the card
-   __ load_byte_map_base(tmp2);
-@@ -437,7 +437,7 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler*
-   assert_different_registers(card_offset, byte_map_base, t0);
- 
-   __ load_parameter(0, card_offset);
--  __ srli(card_offset, card_offset, CardTable::card_shift());
-+  __ srli(card_offset, card_offset, CardTable::card_shift);
-   __ load_byte_map_base(byte_map_base);
- 
-   // Convert card offset into an address in card_addr
-diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
-index a419f92b5f6..868d022ac74 100644
---- a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
-@@ -41,7 +41,7 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob
-   BarrierSet* bs = BarrierSet::barrier_set();
-   assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind");
- 
--  __ srli(obj, obj, CardTable::card_shift());
-+  __ srli(obj, obj, CardTable::card_shift);
- 
-   assert(CardTable::dirty_card_val() == 0, "must be");
- 
-@@ -74,8 +74,8 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl
-   __ shadd(end, count, start, count, LogBytesPerHeapOop);
-   __ sub(end, end, BytesPerHeapOop); // last element address to make inclusive
- 
--  __ srli(start, start, CardTable::card_shift());
--  __ srli(end, end, CardTable::card_shift());
-+  __ srli(start, start, CardTable::card_shift);
-+  __ srli(end, end, CardTable::card_shift);
-   __ sub(count, end, start); // number of bytes to copy
- 
-   __ load_byte_map_base(tmp);
-
-From 6a81a820e6c08cfdd8e29a835e953dabffdca98a Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Wed, 19 Apr 2023 11:30:58 +0800
-Subject: [PATCH 055/140] Revert JDK-8260941: Remove the conc_scan parameter
- for CardTable
-
----
- .../shared/cardTableBarrierSetAssembler_riscv.cpp   | 13 +++++++++++++
- 1 file changed, 13 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
-index 868d022ac74..a476e5ec84d 100644
---- a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
-@@ -41,6 +41,9 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob
-   BarrierSet* bs = BarrierSet::barrier_set();
-   assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind");
- 
-+  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
-+  CardTable* ct = ctbs->card_table();
-+
-   __ srli(obj, obj, CardTable::card_shift);
- 
-   assert(CardTable::dirty_card_val() == 0, "must be");
-@@ -56,6 +59,9 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob
-     __ sb(zr, Address(tmp));
-     __ bind(L_already_dirty);
-   } else {
-+    if (ct->scanned_concurrently()) {
-+      __ membar(MacroAssembler::StoreStore);
-+    }
-     __ sb(zr, Address(tmp));
-   }
- }
-@@ -66,6 +72,10 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl
-   assert_different_registers(start, tmp);
-   assert_different_registers(count, tmp);
- 
-+  BarrierSet* bs = BarrierSet::barrier_set();
-+  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
-+  CardTable* ct = ctbs->card_table();
-+
-   Label L_loop, L_done;
-   const Register end = count;
- 
-@@ -80,6 +90,9 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl
- 
-   __ load_byte_map_base(tmp);
-   __ add(start, start, tmp);
-+  if (ct->scanned_concurrently()) {
-+    __ membar(MacroAssembler::StoreStore);
-+  }
- 
-   __ bind(L_loop);
-   __ add(tmp, start, count);
-
-From 24688cb665b16331b491bed2566dc97582a3d73c Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Wed, 19 Apr 2023 11:32:54 +0800
-Subject: [PATCH 056/140] Revert JDK-8220301: Remove jbyte use in CardTable
-
-Note: An assertion in `CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier` is removed. See the jdk11u backport for AArch64: https://mail.openjdk.org/pipermail/jdk-updates-dev/2019-August/001746.html
----
- src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp    | 3 +++
- .../cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp | 1 +
- src/hotspot/cpu/riscv/macroAssembler_riscv.cpp                 | 2 +-
- src/hotspot/cpu/riscv/riscv.ad                                 | 3 +--
- 4 files changed, 6 insertions(+), 3 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-index 6b75bf63781..b6786c6b327 100644
---- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-@@ -196,6 +196,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
-   BarrierSet* bs = BarrierSet::barrier_set();
-   CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
-   CardTable* ct = ctbs->card_table();
-+  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
- 
-   Label done;
-   Label runtime;
-@@ -213,6 +214,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
-   // storing region crossing non-NULL, is card already dirty?
- 
-   ExternalAddress cardtable((address) ct->byte_map_base());
-+  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
-   const Register card_addr = tmp;
- 
-   __ srli(card_addr, store_addr, CardTable::card_shift);
-@@ -419,6 +421,7 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler*
-   BarrierSet* bs = BarrierSet::barrier_set();
-   CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
-   CardTable* ct = ctbs->card_table();
-+  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
- 
-   Label done;
-   Label runtime;
-diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
-index a476e5ec84d..81d47d61d4c 100644
---- a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
-@@ -43,6 +43,7 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob
- 
-   CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
-   CardTable* ct = ctbs->card_table();
-+  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
- 
-   __ srli(obj, obj, CardTable::card_shift);
- 
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index e557a134b5b..6e4d22db40f 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -2719,7 +2719,7 @@ void MacroAssembler::get_thread(Register thread) {
- }
- 
- void MacroAssembler::load_byte_map_base(Register reg) {
--  CardTable::CardValue* byte_map_base =
-+  jbyte *byte_map_base =
-     ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
-   li(reg, (uint64_t)byte_map_base);
- }
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index c2a0be140e9..ca6a232e1e0 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -2735,8 +2735,7 @@ operand immByteMapBase()
- %{
-   // Get base of card map
-   predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
--            (CardTable::CardValue*)n->get_ptr() ==
--             ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
-+            (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
-   match(ConP);
- 
-   op_cost(0);
-
-From 6ee27261d406342a5378d4a404319866a9bae804 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Wed, 19 Apr 2023 11:51:20 +0800
-Subject: [PATCH 057/140] Revert JDK-8230486:
- G1BarrierSetAssembler::g1_write_barrier_post unnecessarily pushes/pops
- new_val
-
----
- src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-index b6786c6b327..d724876ec3a 100644
---- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-@@ -250,7 +250,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
- 
-   __ bind(runtime);
-   // save the live input values
--  RegSet saved = RegSet::of(store_addr);
-+  RegSet saved = RegSet::of(store_addr, new_val);
-   __ push_reg(saved, sp);
-   __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
-   __ pop_reg(saved, sp);
-
-From 57067a358ffc1b54edfb305549bd460b0fca47f0 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Fri, 21 Apr 2023 12:10:22 +0800
-Subject: [PATCH 058/140] Revert JDK-8242449: AArch64: r27 can be allocated in
- CompressedOops mode
-
----
- src/hotspot/cpu/riscv/riscv.ad | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index ca6a232e1e0..e3f976faa0d 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -4846,6 +4846,8 @@ instruct storeN(iRegN src, memory mem)
- instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
- %{
-   match(Set mem (StoreN mem zero));
-+  predicate(Universe::narrow_oop_base() == NULL &&
-+            Universe::narrow_klass_base() == NULL);
- 
-   ins_cost(STORE_COST);
-   format %{ "sw  rheapbase, $mem\t# compressed ptr (rheapbase==0), #@storeImmN0" %}
-
-From 0db520768d4d268a9dc641e301df45653c52f6eb Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 23 Apr 2023 14:59:09 +0800
-Subject: [PATCH 059/140] A fix for interpreter frame verification code,
- skipping the locals check if there is no locals. See one of the additional
- commits in JDK-8286301, the RISC-V loom port.
-
----
- src/hotspot/cpu/riscv/frame_riscv.cpp | 11 ++++++++++-
- 1 file changed, 10 insertions(+), 1 deletion(-)
-
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
-index d03adc0bff4..13c482b610a 100644
---- a/src/hotspot/cpu/riscv/frame_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
-@@ -571,7 +571,16 @@ bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
- 
-   // validate locals
-   address locals = (address) *interpreter_frame_locals_addr();
--  if (locals > thread->stack_base() || locals < (address) fp()) {
-+  if (locals > thread->stack_base()) {
-+    return false;
-+  }
-+
-+  if (m->max_locals() > 0 && locals < (address) fp()) {
-+    // fp in interpreter frame on RISC-V is higher than that on AArch64,
-+    // pointing to sender_sp and sender_sp-2 relatively.
-+    // On RISC-V, if max_locals is 0, the 'locals' pointer may be below fp,
-+    // pointing to sender_sp-1 (with one padding slot).
-+    // So we verify the 'locals' pointer only if max_locals > 0.
-     return false;
-   }
- 
-
-From 795da5afe59658b4d89cd8501b4f4ec56471b14c Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 11 Apr 2023 11:45:40 +0800
-Subject: [PATCH 060/140] ShenandoahGC adaptations on JDK11 for RISC-V backend
-
----
- .../cpu/riscv/c1_LIRAssembler_riscv.cpp       |   4 +-
- .../c1/shenandoahBarrierSetC1_riscv.cpp       |   2 +-
- .../shenandoahBarrierSetAssembler_riscv.cpp   | 229 +++++++++---------
- .../shenandoahBarrierSetAssembler_riscv.hpp   |  15 +-
- .../riscv/gc/shenandoah/shenandoah_riscv64.ad |  88 -------
- src/hotspot/cpu/riscv/riscv.ad                |   6 +-
- .../templateInterpreterGenerator_riscv.cpp    |  15 +-
- 7 files changed, 146 insertions(+), 213 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-index 6a961ee2307..90c4af5d3b0 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-@@ -1817,10 +1817,12 @@ void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) {
- 
- 
- void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
--  if (patch_code != lir_patch_none) {
-+#if INCLUDE_SHENANDOAHGC
-+  if (UseShenandoahGC && patch_code != lir_patch_none) {
-     deoptimize_trap(info);
-     return;
-   }
-+#endif
- 
-   assert(patch_code == lir_patch_none, "Patch code not supported");
-   LIR_Address* adr = addr->as_address_ptr();
-diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
-index cd568cc723f..d19f5b859ce 100644
---- a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
-@@ -103,7 +103,7 @@ LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRIt
-   __ xchg(access.resolved_addr(), value_opr, result, tmp);
- 
-   if (access.is_oop()) {
--    result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), access.decorators());
-+    result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0));
-     LIR_Opr tmp_opr = gen->new_register(type);
-     __ move(result, tmp_opr);
-     result = tmp_opr;
-diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
-index 84e1205bc25..b8534c52e77 100644
---- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
-@@ -27,7 +27,7 @@
- #include "gc/shenandoah/shenandoahBarrierSet.hpp"
- #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
- #include "gc/shenandoah/shenandoahForwarding.hpp"
--#include "gc/shenandoah/shenandoahHeap.inline.hpp"
-+#include "gc/shenandoah/shenandoahHeap.hpp"
- #include "gc/shenandoah/shenandoahHeapRegion.hpp"
- #include "gc/shenandoah/shenandoahRuntime.hpp"
- #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
-@@ -44,6 +44,8 @@
- 
- #define __ masm->
- 
-+address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL;
-+
- void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-                                                        Register src, Register dst, Register count, RegSet saved_regs) {
-   if (is_oop) {
-@@ -116,10 +118,10 @@ void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm,
-   Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
- 
-   // Is marking active?
--  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
-+  if (in_bytes(ShenandoahSATBMarkQueue::byte_width_of_active()) == 4) {
-     __ lwu(tmp, in_progress);
-   } else {
--    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
-+    assert(in_bytes(ShenandoahSATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
-     __ lbu(tmp, in_progress);
-   }
-   __ beqz(tmp, done);
-@@ -225,37 +227,21 @@ void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssemb
-   __ pop_reg(saved_regs, sp);
- }
- 
--void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm,
--                                                           Register dst,
--                                                           Address load_addr,
--                                                           DecoratorSet decorators) {
-+void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm,
-+                                                                    Register dst,
-+                                                                    Address load_addr) {
-   assert(ShenandoahLoadRefBarrier, "Should be enabled");
-   assert(dst != t1 && load_addr.base() != t1, "need t1");
-   assert_different_registers(load_addr.base(), t0, t1);
- 
--  bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
--  bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
--  bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
--  bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
--  bool is_narrow  = UseCompressedOops && !is_native;
--
--  Label heap_stable, not_cset;
-+  Label done;
-   __ enter();
-   Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
-   __ lbu(t1, gc_state);
- 
-   // Check for heap stability
--  if (is_strong) {
--    __ andi(t1, t1, ShenandoahHeap::HAS_FORWARDED);
--    __ beqz(t1, heap_stable);
--  } else {
--    Label lrb;
--    __ andi(t0, t1, ShenandoahHeap::WEAK_ROOTS);
--    __ bnez(t0, lrb);
--    __ andi(t0, t1, ShenandoahHeap::HAS_FORWARDED);
--    __ beqz(t0, heap_stable);
--    __ bind(lrb);
--  }
-+  __ andi(t1, t1, ShenandoahHeap::HAS_FORWARDED);
-+  __ beqz(t1, done);
- 
-   // use x11 for load address
-   Register result_dst = dst;
-@@ -270,43 +256,12 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm,
-   __ la(x11, load_addr);
-   __ mv(x10, dst);
- 
--  // Test for in-cset
--  if (is_strong) {
--    __ li(t1, (uint64_t)ShenandoahHeap::in_cset_fast_test_addr());
--    __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint());
--    __ add(t1, t1, t0);
--    __ lbu(t1, Address(t1));
--    __ andi(t0, t1, 1);
--    __ beqz(t0, not_cset);
--  }
-+  __ far_call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb())));
- 
--  __ push_call_clobbered_registers();
--  if (is_strong) {
--    if (is_narrow) {
--      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong_narrow);
--    } else {
--      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong);
--    }
--  } else if (is_weak) {
--    if (is_narrow) {
--      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak_narrow);
--    } else {
--      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak);
--    }
--  } else {
--    assert(is_phantom, "only remaining strength");
--    assert(!is_narrow, "phantom access cannot be narrow");
--    __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak);
--  }
--  __ jalr(ra);
--  __ mv(t0, x10);
--  __ pop_call_clobbered_registers();
--  __ mv(x10, t0);
--  __ bind(not_cset);
-   __ mv(result_dst, x10);
-   __ pop_reg(saved_regs, sp);
- 
--  __ bind(heap_stable);
-+  __ bind(done);
-   __ leave();
- }
- 
-@@ -320,6 +275,15 @@ void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register ds
-   }
- }
- 
-+void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr) {
-+  if (ShenandoahLoadRefBarrier) {
-+    Label is_null;
-+    __ beqz(dst, is_null);
-+    load_reference_barrier_not_null(masm, dst, load_addr);
-+    __ bind(is_null);
-+  }
-+}
-+
- //
- // Arguments:
- //
-@@ -363,7 +327,7 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm,
- 
-     BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
- 
--    load_reference_barrier(masm, dst, src, decorators);
-+    load_reference_barrier(masm, dst, src);
- 
-     if (dst != result_dst) {
-       __ mv(result_dst, dst);
-@@ -555,7 +519,7 @@ void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, Shen
-   Register pre_val_reg = stub->pre_val()->as_register();
- 
-   if (stub->do_load()) {
--    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */);
-+    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/);
-   }
-   __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true);
-   ce->store_parameter(stub->pre_val()->as_register(), 0);
-@@ -568,12 +532,6 @@ void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assemble
-   ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
-   __ bind(*stub->entry());
- 
--  DecoratorSet decorators = stub->decorators();
--  bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
--  bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
--  bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
--  bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
--
-   Register obj = stub->obj()->as_register();
-   Register res = stub->result()->as_register();
-   Register addr = stub->addr()->as_pointer_register();
-@@ -587,30 +545,32 @@ void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assemble
-     __ mv(res, obj);
-   }
- 
--  if (is_strong) {
--    // Check for object in cset.
--    __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr());
--    __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint());
--    __ add(tmp2, tmp2, tmp1);
--    __ lbu(tmp2, Address(tmp2));
--    __ beqz(tmp2, *stub->continuation(), true /* is_far */);
--  }
-+  // Check for null.
-+  __ beqz(res, *stub->continuation(), /* is_far */ true);
-+
-+  // Check for object in cset.
-+  __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr());
-+  __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint());
-+  __ add(t0, tmp2, tmp1);
-+  __ lb(tmp2, Address(t0));
-+  __ beqz(tmp2, *stub->continuation(), /* is_far */ true);
-+
-+  // Check if object is already forwarded.
-+  Label slow_path;
-+  __ ld(tmp1, Address(res, oopDesc::mark_offset_in_bytes()));
-+  __ xori(tmp1, tmp1, -1);
-+  __ andi(t0, tmp1, markOopDesc::lock_mask_in_place);
-+  __ bnez(t0, slow_path);
-+
-+  // Decode forwarded object.
-+  __ ori(tmp1, tmp1, markOopDesc::marked_value);
-+  __ xori(res, tmp1, -1);
-+  __ j(*stub->continuation());
- 
-+  __ bind(slow_path);
-   ce->store_parameter(res, 0);
-   ce->store_parameter(addr, 1);
--
--  if (is_strong) {
--    if (is_native) {
--      __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin()));
--    } else {
--      __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_rt_code_blob()->code_begin()));
--    }
--  } else if (is_weak) {
--    __ far_call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin()));
--  } else {
--    assert(is_phantom, "only remaining strength");
--    __ far_call(RuntimeAddress(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin()));
--  }
-+  __ far_call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin()));
- 
-   __ j(*stub->continuation());
- }
-@@ -664,8 +624,7 @@ void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAss
-   __ epilogue();
- }
- 
--void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm,
--                                                                                    DecoratorSet decorators) {
-+void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm) {
-   __ prologue("shenandoah_load_reference_barrier", false);
-   // arg0 : object to be resolved
- 
-@@ -673,31 +632,10 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s
-   __ load_parameter(0, x10);
-   __ load_parameter(1, x11);
- 
--  bool is_strong  = ShenandoahBarrierSet::is_strong_access(decorators);
--  bool is_weak    = ShenandoahBarrierSet::is_weak_access(decorators);
--  bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);
--  bool is_native  = ShenandoahBarrierSet::is_native_access(decorators);
--  if (is_strong) {
--    if (is_native) {
--      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong);
--    } else {
--      if (UseCompressedOops) {
--        __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong_narrow);
--      } else {
--        __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong);
--      }
--    }
--  } else if (is_weak) {
--    assert(!is_native, "weak must not be called off-heap");
--    if (UseCompressedOops) {
--      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak_narrow);
--    } else {
--      __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak);
--    }
-+  if (UseCompressedOops) {
-+    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
-   } else {
--    assert(is_phantom, "only remaining strength");
--    assert(is_native, "phantom must only be called off-heap");
--    __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_phantom);
-+    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
-   }
-   __ jalr(ra);
-   __ mv(t0, x10);
-@@ -710,3 +648,68 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s
- #undef __
- 
- #endif // COMPILER1
-+
-+address ShenandoahBarrierSetAssembler::shenandoah_lrb() {
-+  assert(_shenandoah_lrb != NULL, "need load reference barrier stub");
-+  return _shenandoah_lrb;
-+}
-+
-+#define __ cgen->assembler()->
-+
-+// Shenandoah load reference barrier.
-+//
-+// Input:
-+//   x10: OOP to evacuate.  Not null.
-+//   x11: load address
-+//
-+// Output:
-+//   x10: Pointer to evacuated OOP.
-+//
-+// Trash t0 t1  Preserve everything else.
-+address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) {
-+  __ align(6);
-+  StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb");
-+  address start = __ pc();
-+
-+  Label slow_path;
-+  __ mv(t1, ShenandoahHeap::in_cset_fast_test_addr());
-+  __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint());
-+  __ add(t1, t1, t0);
-+  __ lbu(t1, Address(t1, 0));
-+  __ andi(t0, t1, 1);
-+  __ bnez(t0, slow_path);
-+  __ ret();
-+
-+  __ bind(slow_path);
-+  __ enter(); // required for proper stackwalking of RuntimeStub frame
-+
-+  __ push_call_clobbered_registers();
-+
-+  if (UseCompressedOops) {
-+    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
-+  } else {
-+    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
-+  }
-+  __ jalr(ra);
-+  __ mv(t0, x10);
-+  __ pop_call_clobbered_registers();
-+  __ mv(x10, t0);
-+
-+  __ leave(); // required for proper stackwalking of RuntimeStub frame
-+  __ ret();
-+
-+  return start;
-+}
-+
-+#undef __
-+
-+void ShenandoahBarrierSetAssembler::barrier_stubs_init() {
-+  if (ShenandoahLoadRefBarrier) {
-+    int stub_code_size = 2048;
-+    ResourceMark rm;
-+    BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size);
-+    CodeBuffer buf(bb);
-+    StubCodeGenerator cgen(&buf);
-+    _shenandoah_lrb = generate_shenandoah_lrb(&cgen);
-+  }
-+}
-diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
-index a705f497667..5d75035e9d4 100644
---- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
-@@ -40,6 +40,8 @@ class StubCodeGenerator;
- class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
- private:
- 
-+  static address _shenandoah_lrb;
-+
-   void satb_write_barrier_pre(MacroAssembler* masm,
-                               Register obj,
-                               Register pre_val,
-@@ -57,17 +59,22 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
- 
-   void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg);
-   void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg);
--  void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, DecoratorSet decorators);
-+  void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr);
-+  void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address load_addr);
-+
-+  address generate_shenandoah_lrb(StubCodeGenerator* cgen);
- 
- public:
- 
-+  static address shenandoah_lrb();
-+
-   void iu_barrier(MacroAssembler* masm, Register dst, Register tmp);
- 
- #ifdef COMPILER1
-   void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub);
-   void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub);
-   void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
--  void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators);
-+  void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm);
- #endif
- 
-   virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-@@ -81,8 +88,10 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
-   virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
-                                              Register obj, Register tmp, Label& slowpath);
- 
--  void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val,
-+  virtual void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val,
-                    Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result);
-+
-+  virtual void barrier_stubs_init();
- };
- 
- #endif // CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
-index 6c855f23c2a..bab407a8b76 100644
---- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
-+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
-@@ -176,48 +176,6 @@ instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldva
-   ins_pipe(pipe_slow);
- %}
- 
--instruct compareAndExchangeNAcq_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
--  predicate(needs_acquiring_load_reserved(n));
--  match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval)));
--  ins_cost(10 * DEFAULT_COST);
--
--  effect(TEMP_DEF res, TEMP tmp, KILL cr);
--  format %{
--    "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq_shenandoah"
--  %}
--
--  ins_encode %{
--    Register tmp = $tmp$$Register;
--    __ mv(tmp, $oldval$$Register);
--    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
--                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
--                                                   true /* is_cae */, $res$$Register);
--  %}
--
--  ins_pipe(pipe_slow);
--%}
--
--instruct compareAndExchangePAcq_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
--  predicate(needs_acquiring_load_reserved(n));
--  match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval)));
--  ins_cost(10 * DEFAULT_COST);
--
--  effect(TEMP_DEF res, TEMP tmp, KILL cr);
--  format %{
--    "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq_shenandoah"
--  %}
--
--  ins_encode %{
--    Register tmp = $tmp$$Register;
--    __ mv(tmp, $oldval$$Register);
--    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
--                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
--                                                   true /* is_cae */, $res$$Register);
--  %}
--
--  ins_pipe(pipe_slow);
--%}
--
- instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
-   match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval)));
-   ins_cost(10 * DEFAULT_COST);
-@@ -237,49 +195,3 @@ instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldva
- 
-   ins_pipe(pipe_slow);
- %}
--
--instruct weakCompareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
--  predicate(needs_acquiring_load_reserved(n));
--  match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval)));
--  ins_cost(10 * DEFAULT_COST);
--
--  effect(TEMP tmp, KILL cr);
--  format %{
--    "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapNAcq_shenandoah"
--    "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)"
--  %}
--
--  ins_encode %{
--    Register tmp = $tmp$$Register;
--    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
--    // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop
--    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
--                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
--                                                   false /* is_cae */, $res$$Register);
--  %}
--
--  ins_pipe(pipe_slow);
--%}
--
--instruct weakCompareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
--  predicate(needs_acquiring_load_reserved(n));
--  match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval)));
--  ins_cost(10 * DEFAULT_COST);
--
--  effect(TEMP tmp, KILL cr);
--  format %{
--    "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapPAcq_shenandoah"
--    "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)"
--  %}
--
--  ins_encode %{
--    Register tmp = $tmp$$Register;
--    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
--    // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop
--    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
--                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
--                                                   false /* is_cae */, $res$$Register);
--  %}
--
--  ins_pipe(pipe_slow);
--%}
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index e3f976faa0d..a6061de7a33 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -828,8 +828,10 @@ bool is_CAS(int opcode, bool maybe_volatile)
-     case Op_CompareAndSwapL:
-     case Op_CompareAndSwapP:
-     case Op_CompareAndSwapN:
-+#if INCLUDE_SHENANDOAHGC
-     case Op_ShenandoahCompareAndSwapP:
-     case Op_ShenandoahCompareAndSwapN:
-+#endif
-     case Op_CompareAndSwapB:
-     case Op_CompareAndSwapS:
-     case Op_GetAndSetI:
-@@ -851,10 +853,6 @@ bool is_CAS(int opcode, bool maybe_volatile)
-     case Op_WeakCompareAndSwapL:
-     case Op_WeakCompareAndSwapP:
-     case Op_WeakCompareAndSwapN:
--    case Op_ShenandoahWeakCompareAndSwapP:
--    case Op_ShenandoahWeakCompareAndSwapN:
--    case Op_ShenandoahCompareAndExchangeP:
--    case Op_ShenandoahCompareAndExchangeN:
-       return maybe_volatile;
-     default:
-       return false;
-diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-index a07dea35b73..5a87c687cf7 100644
---- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-@@ -765,9 +765,18 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
-   __ sd(ProfileInterpreter ? t0 : zr, Address(sp, 6 * wordSize));
- 
-   // Get mirror and store it in the frame as GC root for this Method*
--  __ load_mirror(t2, xmethod);
--  __ sd(zr, Address(sp, 5 * wordSize));
--  __ sd(t2, Address(sp, 4 * wordSize));
-+#if INCLUDE_SHENANDOAHGC
-+  if (UseShenandoahGC) {
-+    __ load_mirror(x28, xmethod);
-+    __ sd(zr, Address(sp, 5 * wordSize));
-+    __ sd(x28, Address(sp, 4 * wordSize));
-+  } else
-+#endif
-+  {
-+    __ load_mirror(t2, xmethod);
-+    __ sd(zr, Address(sp, 5 * wordSize));
-+    __ sd(t2, Address(sp, 4 * wordSize));
-+  }
- 
-   __ ld(xcpool, Address(xmethod, Method::const_offset()));
-   __ ld(xcpool, Address(xcpool, ConstMethod::constants_offset()));
-
-From d8b14fd5e6455b47cfcb02d13c0c24c74e824570 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 14:42:07 +0800
-Subject: [PATCH 061/140] Revert JDK-8248404: AArch64: Remove uses of long and
- unsigned long
-
----
- src/hotspot/cpu/riscv/assembler_riscv.hpp     | 19 +++++++++++++------
- .../cpu/riscv/macroAssembler_riscv.cpp        |  6 ------
- .../cpu/riscv/macroAssembler_riscv.hpp        | 13 ++++++++-----
- 3 files changed, 21 insertions(+), 17 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
-index 51aa052a0c7..31aeeb9b425 100644
---- a/src/hotspot/cpu/riscv/assembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
-@@ -183,13 +183,20 @@ class Address {
-     : _base(noreg), _index(noreg), _offset(0), _mode(no_mode), _target(NULL) { }
-   Address(Register r)
-     : _base(r), _index(noreg), _offset(0), _mode(base_plus_offset), _target(NULL) { }
--
--  template<typename T, ENABLE_IF(std::is_integral<T>::value)>
--  Address(Register r, T o)
--    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) {}
--
-+  Address(Register r, int o)
-+    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
-+  Address(Register r, long o)
-+    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
-+  Address(Register r, long long o)
-+    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
-+  Address(Register r, unsigned int o)
-+    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
-+  Address(Register r, unsigned long o)
-+    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
-+  Address(Register r, unsigned long long o)
-+    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
-   Address(Register r, ByteSize disp)
--    : Address(r, in_bytes(disp)) {}
-+    : Address(r, in_bytes(disp)) { }
-   Address(address target, RelocationHolder const& rspec)
-     : _base(noreg),
-       _index(noreg),
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index 6e4d22db40f..b95f69cfcda 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -1364,12 +1364,6 @@ void MacroAssembler::mv(Register Rd, Address dest) {
-   movptr(Rd, dest.target());
- }
- 
--void MacroAssembler::mv(Register Rd, address addr) {
--  // Here in case of use with relocation, use fix length instruciton
--  // movptr instead of li
--  movptr(Rd, addr);
--}
--
- void MacroAssembler::mv(Register Rd, RegisterOrConstant src) {
-   if (src.is_register()) {
-     mv(Rd, src.as_register());
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index c1ffa120774..76b2716659b 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -543,15 +543,18 @@ class MacroAssembler: public Assembler {
-   }
- 
-   // mv
--  template<typename T, ENABLE_IF(std::is_integral<T>::value)>
--  inline void mv(Register Rd, T o) {
--    li(Rd, (int64_t)o);
--  }
-+  void mv(Register Rd, address addr)                    { li(Rd, (int64_t)addr);  }
-+
-+  inline void mv(Register Rd, int imm64)                { li(Rd, (int64_t)imm64); }
-+  inline void mv(Register Rd, long imm64)               { li(Rd, (int64_t)imm64); }
-+  inline void mv(Register Rd, long long imm64)          { li(Rd, (int64_t)imm64); }
-+  inline void mv(Register Rd, unsigned int imm64)       { li(Rd, (int64_t)imm64); }
-+  inline void mv(Register Rd, unsigned long imm64)      { li(Rd, (int64_t)imm64); }
-+  inline void mv(Register Rd, unsigned long long imm64) { li(Rd, (int64_t)imm64); }
- 
-   inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); }
- 
-   void mv(Register Rd, Address dest);
--  void mv(Register Rd, address addr);
-   void mv(Register Rd, RegisterOrConstant src);
- 
-   // logic
-
-From 94c1c9c01e61d0cb7c32596ef19b347c32406546 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 16:54:36 +0800
-Subject: [PATCH 062/140] Revert JDK-8280503: Use allStatic.hpp instead of
- allocation.hpp where possible
-
----
- src/hotspot/cpu/riscv/bytes_riscv.hpp    | 2 --
- src/hotspot/cpu/riscv/jniTypes_riscv.hpp | 1 -
- 2 files changed, 3 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/bytes_riscv.hpp b/src/hotspot/cpu/riscv/bytes_riscv.hpp
-index 23d982f9abd..f60e0e38ae8 100644
---- a/src/hotspot/cpu/riscv/bytes_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/bytes_riscv.hpp
-@@ -27,8 +27,6 @@
- #ifndef CPU_RISCV_BYTES_RISCV_HPP
- #define CPU_RISCV_BYTES_RISCV_HPP
- 
--#include "memory/allStatic.hpp"
--
- class Bytes: AllStatic {
-  public:
-   // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering
-diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
-index 83ffcc55d83..bc4e5758256 100644
---- a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
-@@ -27,7 +27,6 @@
- #define CPU_RISCV_JNITYPES_RISCV_HPP
- 
- #include "jni.h"
--#include "memory/allStatic.hpp"
- #include "oops/oop.hpp"
- 
- // This file holds platform-dependent routines used to write primitive jni
-
-From 49e6399009b51edafa6904164528e1d051aeae6c Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 17:07:31 +0800
-Subject: [PATCH 063/140] Revert JDK-8276453: Undefined behavior in C1
- LIR_OprDesc causes SEGV in fastdebug build
-
----
- src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp    | 4 ++--
- src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp     | 4 ++--
- src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp | 1 +
- 3 files changed, 5 insertions(+), 4 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
-index af7bd067f33..6057d43296b 100644
---- a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
-@@ -58,7 +58,7 @@ RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array)
- }
- 
- RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index)
--  : _index(index), _array(), _throw_index_out_of_bounds_exception(true) {
-+  : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) {
-   assert(info != NULL, "must have info");
-   _info = new CodeEmitInfo(info);
- }
-@@ -83,7 +83,7 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) {
-   if (_throw_index_out_of_bounds_exception) {
-     stub_id = Runtime1::throw_index_exception_id;
-   } else {
--    assert(_array != LIR_Opr::nullOpr(), "sanity");
-+    assert(_array != NULL, "sanity");
-     __ mv(t1, _array->as_pointer_register());
-     stub_id = Runtime1::throw_range_check_failed_id;
-   }
-diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
-index 172031941b2..1f8b2b55100 100644
---- a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
-@@ -156,8 +156,8 @@ LIR_Opr FrameMap::long11_opr;
- LIR_Opr FrameMap::fpu10_float_opr;
- LIR_Opr FrameMap::fpu10_double_opr;
- 
--LIR_Opr FrameMap::_caller_save_cpu_regs[] = {};
--LIR_Opr FrameMap::_caller_save_fpu_regs[] = {};
-+LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, };
-+LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, };
- 
- //--------------------------------------------------------
- //               FrameMap
-diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-index c45a75b2301..227e7664225 100644
---- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-@@ -206,6 +206,7 @@ LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) {
-       break;
-     default:
-       ShouldNotReachHere();
-+      r = NULL;
-   }
-   return r;
- }
-
-From b94bda9d1a2c12fa379f8fe813460c498344f543 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 17:19:19 +0800
-Subject: [PATCH 064/140] Revert JDK-8256205: Simplify compiler calling
- convention handling
-
----
- src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp  |  2 +-
- src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp   |  2 +-
- src/hotspot/cpu/riscv/riscv.ad                | 25 +++++++++++++++++++
- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 11 ++------
- 4 files changed, 29 insertions(+), 11 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
-index 6057d43296b..12980c12de6 100644
---- a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
-@@ -290,7 +290,7 @@ void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
-   const int args_num = 5;
-   VMRegPair args[args_num];
-   BasicType signature[args_num] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT };
--  SharedRuntime::java_calling_convention(signature, args, args_num);
-+  SharedRuntime::java_calling_convention(signature, args, args_num, true);
- 
-   // push parameters
-   Register r[args_num];
-diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
-index 1f8b2b55100..682ebe82627 100644
---- a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
-@@ -314,7 +314,7 @@ void FrameMap::initialize() {
- 
-   VMRegPair regs;
-   BasicType sig_bt = T_OBJECT;
--  SharedRuntime::java_calling_convention(&sig_bt, &regs, 1);
-+  SharedRuntime::java_calling_convention(&sig_bt, &regs, 1, true);
-   receiver_opr = as_oop_opr(regs.first()->as_Register());
- 
-   for (i = 0; i < nof_caller_save_fpu_regs; i++) {
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index a6061de7a33..1667994699f 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -2443,6 +2443,12 @@ frame %{
-   // Stack alignment requirement
-   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
- 
-+  // Number of stack slots between incoming argument block and the start of
-+  // a new frame.  The PROLOG must add this many slots to the stack.  The
-+  // EPILOG must remove this many slots. RISC-V needs two slots for
-+  // return address and fp.
-+  in_preserve_stack_slots(2 * VMRegImpl::slots_per_word);
-+
-   // Number of outgoing stack slots killed above the out_preserve_stack_slots
-   // for calls to C.  Supports the var-args backing area for register parms.
-   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes / BytesPerInt);
-@@ -2461,6 +2467,25 @@ frame %{
-                         Compile::current()->fixed_slots()),
-                        stack_alignment_in_slots()));
- 
-+  // Body of function which returns an integer array locating
-+  // arguments either in registers or in stack slots.  Passed an array
-+  // of ideal registers called "sig" and a "length" count.  Stack-slot
-+  // offsets are based on outgoing arguments, i.e. a CALLER setting up
-+  // arguments for a CALLEE.  Incoming stack arguments are
-+  // automatically biased by the preserve_stack_slots field above.
-+
-+  calling_convention
-+  %{
-+    // No difference between ingoing/outgoing just pass false
-+    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
-+  %}
-+
-+  c_calling_convention
-+  %{
-+    // This is obviously always outgoing
-+    (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
-+  %}
-+
-   // Location of compiled Java return values.  Same as C for now.
-   return_value
-   %{
-diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-index eaefcc2b595..411bddd2ace 100644
---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-@@ -233,7 +233,8 @@ static int reg2offset_out(VMReg r) {
- 
- int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
-                                            VMRegPair *regs,
--                                           int total_args_passed) {
-+                                           int total_args_passed,
-+                                           int is_outgoing) {
-   // Create the mapping between argument positions and
-   // registers.
-   static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = {
-@@ -2155,14 +2156,6 @@ void SharedRuntime::generate_deopt_blob() {
-   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
- }
- 
--// Number of stack slots between incoming argument block and the start of
--// a new frame. The PROLOG must add this many slots to the stack. The
--// EPILOG must remove this many slots.
--// RISCV needs two words for RA (return address) and FP (frame pointer).
--uint SharedRuntime::in_preserve_stack_slots() {
--  return 2 * VMRegImpl::slots_per_word;
--}
--
- uint SharedRuntime::out_preserve_stack_slots() {
-   return 0;
- }
-
-From 3fc948472c4a0918b967646b45c8886103b839d2 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 17:27:57 +0800
-Subject: [PATCH 065/140] Revert JDK-8183574: Unify the is_power_of_2 functions
-
----
- src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp        | 4 ++--
- src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp              | 1 -
- src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp              | 3 +--
- src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp                  | 1 -
- src/hotspot/cpu/riscv/interp_masm_riscv.cpp                  | 1 -
- src/hotspot/cpu/riscv/macroAssembler_riscv.cpp               | 1 -
- src/hotspot/cpu/riscv/macroAssembler_riscv.hpp               | 1 -
- src/hotspot/cpu/riscv/stubGenerator_riscv.cpp                | 1 -
- src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp | 1 -
- src/hotspot/cpu/riscv/templateTable_riscv.cpp                | 1 -
- 10 files changed, 3 insertions(+), 12 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
-index 4c1c13dc290..65d0eda62ef 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
-@@ -190,7 +190,7 @@ void LIR_Assembler::arith_op_double_cpu(LIR_Code code, LIR_Opr left, LIR_Opr rig
-         code == lir_add ? __ add(dreg, lreg_lo, c) : __ sub(dreg, lreg_lo, c);
-         break;
-       case lir_div:
--        assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
-+        assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant");
-         if (c == 1) {
-           // move lreg_lo to dreg if divisor is 1
-           __ mv(dreg, lreg_lo);
-@@ -208,7 +208,7 @@ void LIR_Assembler::arith_op_double_cpu(LIR_Code code, LIR_Opr left, LIR_Opr rig
-         }
-         break;
-       case lir_rem:
--        assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
-+        assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant");
-         if (c == 1) {
-           // move 0 to dreg if divisor is 1
-           __ mv(dreg, zr);
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-index 90c4af5d3b0..9de89a3b026 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-@@ -41,7 +41,6 @@
- #include "oops/objArrayKlass.hpp"
- #include "runtime/frame.inline.hpp"
- #include "runtime/sharedRuntime.hpp"
--#include "utilities/powerOfTwo.hpp"
- #include "vmreg_riscv.inline.hpp"
- 
- #ifndef PRODUCT
-diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-index 227e7664225..a9345158749 100644
---- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-@@ -38,7 +38,6 @@
- #include "ci/ciTypeArrayKlass.hpp"
- #include "runtime/sharedRuntime.hpp"
- #include "runtime/stubRoutines.hpp"
--#include "utilities/powerOfTwo.hpp"
- #include "vmreg_riscv.inline.hpp"
- 
- #ifdef ASSERT
-@@ -383,7 +382,7 @@ void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) {
-       // no need to do div-by-zero check if the divisor is a non-zero constant
-       if (c != 0) { need_zero_check = false; }
-       // do not load right if the divisor is a power-of-2 constant
--      if (c > 0 && is_power_of_2(c)) {
-+      if (c > 0 && is_power_of_2_long(c)) {
-         right.dont_load_item();
-       } else {
-         right.load_item();
-diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
-index 1f45fba9de0..fc88d5c180e 100644
---- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
-@@ -46,7 +46,6 @@
- #include "runtime/stubRoutines.hpp"
- #include "runtime/vframe.hpp"
- #include "runtime/vframeArray.hpp"
--#include "utilities/powerOfTwo.hpp"
- #include "vmreg_riscv.inline.hpp"
- 
- 
-diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-index f0c249f0d26..2fc0b00e2cb 100644
---- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-@@ -44,7 +44,6 @@
- #include "runtime/safepointMechanism.hpp"
- #include "runtime/sharedRuntime.hpp"
- #include "runtime/thread.inline.hpp"
--#include "utilities/powerOfTwo.hpp"
- 
- void InterpreterMacroAssembler::narrow(Register result) {
-   // Get method->_constMethod->_result_type
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index b95f69cfcda..41a415ef2cf 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -47,7 +47,6 @@
- #include "runtime/sharedRuntime.hpp"
- #include "runtime/stubRoutines.hpp"
- #include "runtime/thread.hpp"
--#include "utilities/powerOfTwo.hpp"
- #ifdef COMPILER2
- #include "opto/compile.hpp"
- #include "opto/node.hpp"
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index 76b2716659b..dd39f67d507 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -30,7 +30,6 @@
- #include "asm/assembler.hpp"
- #include "metaprogramming/enableIf.hpp"
- #include "oops/compressedOops.hpp"
--#include "utilities/powerOfTwo.hpp"
- 
- // MacroAssembler extends Assembler by frequently used macros.
- //
-diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-index 8392b768847..0c5b0e001ee 100644
---- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-@@ -45,7 +45,6 @@
- #include "runtime/stubRoutines.hpp"
- #include "runtime/thread.inline.hpp"
- #include "utilities/align.hpp"
--#include "utilities/powerOfTwo.hpp"
- #ifdef COMPILER2
- #include "opto/runtime.hpp"
- #endif
-diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-index 5a87c687cf7..a10677bf650 100644
---- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-@@ -51,7 +51,6 @@
- #include "runtime/timer.hpp"
- #include "runtime/vframeArray.hpp"
- #include "utilities/debug.hpp"
--#include "utilities/powerOfTwo.hpp"
- #include <sys/types.h>
- 
- #ifndef PRODUCT
-diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-index 158294f7436..2a92fb9dd49 100644
---- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-@@ -44,7 +44,6 @@
- #include "runtime/sharedRuntime.hpp"
- #include "runtime/stubRoutines.hpp"
- #include "runtime/synchronizer.hpp"
--#include "utilities/powerOfTwo.hpp"
- 
- #define __ _masm->
- 
-
-From 31b18aa6a29b83e2cae7ea76c5d4759b2596eca0 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 17:34:39 +0800
-Subject: [PATCH 066/140] Revert JDK-8276976: Rename LIR_OprDesc to LIR_Opr
-
----
- src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp |  2 +-
- src/hotspot/cpu/riscv/c1_LIR_riscv.cpp          | 14 +++++++-------
- 2 files changed, 8 insertions(+), 8 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-index 9de89a3b026..70ee6295bfb 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-@@ -1261,7 +1261,7 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
-     assert(op->addr()->is_address(), "what else?");
-     LIR_Address* addr_ptr = op->addr()->as_address_ptr();
-     assert(addr_ptr->disp() == 0, "need 0 disp");
--    assert(addr_ptr->index() == LIR_Opr::illegalOpr(), "need 0 index");
-+    assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index");
-     addr = as_reg(addr_ptr->base());
-   }
-   Register newval = as_reg(op->new_value());
-diff --git a/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp
-index 5f1c394ab3d..0317ed9003e 100644
---- a/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp
-@@ -27,22 +27,22 @@
- #include "asm/register.hpp"
- #include "c1/c1_LIR.hpp"
- 
--FloatRegister LIR_Opr::as_float_reg() const {
-+FloatRegister LIR_OprDesc::as_float_reg() const {
-   return as_FloatRegister(fpu_regnr());
- }
- 
--FloatRegister LIR_Opr::as_double_reg() const {
-+FloatRegister LIR_OprDesc::as_double_reg() const {
-   return as_FloatRegister(fpu_regnrLo());
- }
- 
- // Reg2 unused.
- LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) {
-   assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform");
--  return (LIR_Opr)(intptr_t)((reg1 << LIR_Opr::reg1_shift) |
--                             (reg1 << LIR_Opr::reg2_shift) |
--                             LIR_Opr::double_type          |
--                             LIR_Opr::fpu_register         |
--                             LIR_Opr::double_size);
-+  return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) |
-+                             (reg1 << LIR_OprDesc::reg2_shift) |
-+                             LIR_OprDesc::double_type          |
-+                             LIR_OprDesc::fpu_register         |
-+                             LIR_OprDesc::double_size);
- }
- 
- #ifndef PRODUCT
-
-From 2e64fa47eddc271d32b136ace4f062cfb9648b25 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 17:39:16 +0800
-Subject: [PATCH 067/140] Revert JDK-8269672: C1: Remove unaligned move on all
- architectures
-
----
- src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp           | 8 +++++---
- .../cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp       | 2 +-
- 2 files changed, 6 insertions(+), 4 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-index 70ee6295bfb..e29c0df5f8b 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-@@ -673,7 +673,7 @@ void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool po
-   }
- }
- 
--void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide) {
-+void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) {
-   LIR_Address* to_addr = dest->as_address_ptr();
-   // t0 was used as tmp reg in as_Address, so we use t1 as compressed_src
-   Register compressed_src = t1;
-@@ -795,7 +795,7 @@ void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) {
-   reg2stack(temp, dest, dest->type(), false);
- }
- 
--void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide) {
-+void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool /* unaligned */) {
-   assert(src->is_address(), "should not call otherwise");
-   assert(dest->is_register(), "should not call otherwise");
- 
-@@ -910,11 +910,13 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L
-   Label done;
-   move_op(opr2, result, type, lir_patch_none, NULL,
-           false,   // pop_fpu_stack
-+          false,   // unaligned
-           false);  // wide
-   __ j(done);
-   __ bind(label);
-   move_op(opr1, result, type, lir_patch_none, NULL,
-           false,   // pop_fpu_stack
-+          false,   // unaligned
-           false);  // wide
-   __ bind(done);
- }
-@@ -1866,7 +1868,7 @@ void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* arg
- 
- void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) {
-   if (dest->is_address() || src->is_address()) {
--    move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, /* wide */ false);
-+    move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, /*unaligned*/ false, /* wide */ false);
-   } else {
-     ShouldNotReachHere();
-   }
-diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-index d724876ec3a..bc847388f68 100644
---- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-@@ -340,7 +340,7 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier
-   Register pre_val_reg = stub->pre_val()->as_register();
- 
-   if (stub->do_load()) {
--    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */);
-+    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/);
-   }
-   __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true);
-   ce->store_parameter(stub->pre_val()->as_register(), 0);
-
-From 5f15abe61c700cbf59805530c52e8e558354d552 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 17:54:05 +0800
-Subject: [PATCH 068/140] Revert JDK-8264805: Remove the experimental
- Ahead-of-Time Compiler
-
----
- src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp | 1 +
- src/hotspot/cpu/riscv/compiledIC_riscv.cpp      | 4 ++--
- 2 files changed, 3 insertions(+), 2 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
-index 051328c3a8a..5c81f1c704c 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
-@@ -73,6 +73,7 @@ friend class ArrayCopyStub;
-     // CompiledStaticCall::to_interp_stub_size() (14) + CompiledStaticCall::to_trampoline_stub_size() (1 + 3 + address)
-     _call_stub_size = 14 * NativeInstruction::instruction_size +
-                       (NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size),
-+    _call_aot_stub_size = 0,
-     // See emit_exception_handler for detail
-     // verify_not_null_oop + far_call + should_not_reach_here + invalidate_registers(DEBUG_ONLY)
-     _exception_handler_size = DEBUG_ONLY(584) NOT_DEBUG(548), // or smaller
-diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-index 1cfc92b28fa..a29e5be9dbb 100644
---- a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-@@ -86,7 +86,7 @@ int CompiledStaticCall::reloc_to_interp_stub() {
- }
- 
- void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) {
--  address stub = find_stub();
-+  address stub = find_stub(false /* is_aot */);
-   guarantee(stub != NULL, "stub not found");
- 
-   if (TraceICs) {
-@@ -138,7 +138,7 @@ void CompiledDirectStaticCall::verify() {
-   _call->verify_alignment();
- 
-   // Verify stub.
--  address stub = find_stub();
-+  address stub = find_stub(false /* is_aot */);
-   assert(stub != NULL, "no stub found for static call");
-   // Creation also verifies the object.
-   NativeMovConstReg* method_holder
-
-From 4cfd20c7d163188a1a4e63ffaa19708e15be9d96 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 17:59:45 +0800
-Subject: [PATCH 069/140] Revert JDK-8277417: C1 LIR instruction for load-klass
-
----
- .../cpu/riscv/c1_LIRAssembler_riscv.cpp       | 30 ++++++++-----------
- 1 file changed, 12 insertions(+), 18 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-index e29c0df5f8b..49653d04d81 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-@@ -840,7 +840,14 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch
-       __ ld(dest->as_register(), as_Address(from_addr));
-       break;
-     case T_ADDRESS:
--      __ ld(dest->as_register(), as_Address(from_addr));
-+      // FIXME: OMG this is a horrible kludge.  Any offset from an
-+      // address that matches klass_offset_in_bytes() will be loaded
-+      // as a word, not a long.
-+      if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) {
-+        __ lwu(dest->as_register(), as_Address(from_addr));
-+      } else {
-+        __ ld(dest->as_register(), as_Address(from_addr));
-+      }
-       break;
-     case T_INT:
-       __ lw(dest->as_register(), as_Address(from_addr));
-@@ -869,6 +876,10 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch
-       __ decode_heap_oop(dest->as_register());
-     }
-     __ verify_oop(dest->as_register());
-+  } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) {
-+    if (UseCompressedClassPointers) {
-+      __ decode_klass_not_null(dest->as_register());
-+    }
-   }
- }
- 
-@@ -1531,23 +1542,6 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) {
-   __ bind(*op->stub()->continuation());
- }
- 
--void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) {
--  Register obj = op->obj()->as_pointer_register();
--  Register result = op->result_opr()->as_pointer_register();
--
--  CodeEmitInfo* info = op->info();
--  if (info != NULL) {
--    add_debug_info_for_null_check_here(info);
--  }
--
--  if (UseCompressedClassPointers) {
--    __ lwu(result, Address(obj, oopDesc::klass_offset_in_bytes()));
--    __ decode_klass_not_null(result);
--  } else {
--    __ ld(result, Address(obj, oopDesc::klass_offset_in_bytes()));
--  }
--}
--
- void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
-   ciMethod* method = op->profiled_method();
-   int bci          = op->profiled_bci();
-
-From eb4de6fc8f9b6192d16343382ebbe4035ce71702 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 18:09:31 +0800
-Subject: [PATCH 070/140] Revert JDK-8245957: Remove unused LIR_OpBranch::type
- after SPARC port removal
-
----
- src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp | 8 ++++----
- 1 file changed, 4 insertions(+), 4 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-index a9345158749..2aba4f4974f 100644
---- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-@@ -393,7 +393,7 @@ void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) {
-     if (need_zero_check) {
-       CodeEmitInfo* info = state_for(x);
-       __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0));
--      __ branch(lir_cond_equal, new DivByZeroStub(info));
-+      __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(info));
-     }
- 
-     rlock_result(x);
-@@ -467,7 +467,7 @@ void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) {
-     if (need_zero_check) {
-       CodeEmitInfo* info = state_for(x);
-       __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0));
--      __ branch(lir_cond_equal, new DivByZeroStub(info));
-+      __ branch(lir_cond_equal, T_INT, new DivByZeroStub(info));
-     }
- 
-     LIR_Opr ill = LIR_OprFact::illegalOpr;
-@@ -1055,9 +1055,9 @@ void LIRGenerator::do_If(If* x) {
-   profile_branch(x, cond);
-   move_to_phi(x->state());
-   if (x->x()->type()->is_float_kind()) {
--    __ branch(lir_cond(cond), x->tsux(), x->usux());
-+    __ branch(lir_cond(cond), right->type(), x->tsux(), x->usux());
-   } else {
--    __ branch(lir_cond(cond), x->tsux());
-+    __ branch(lir_cond(cond), right->type(), x->tsux());
-   }
-   assert(x->default_sux() == x->fsux(), "wrong destination above");
-   __ jump(x->default_sux());
-
-From d34f25c618982d3ac79e6ab2a47b3a199434d01b Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 18:14:10 +0800
-Subject: [PATCH 071/140] Revert JDK-8266950: Remove vestigial support for
- non-strict floating-point execution
-
----
- src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp | 4 ++++
- src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp       | 7 ++++++-
- 2 files changed, 10 insertions(+), 1 deletion(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
-index 65d0eda62ef..2a99d49c94b 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
-@@ -238,7 +238,9 @@ void LIR_Assembler::arith_op_single_fpu(LIR_Code code, LIR_Opr left, LIR_Opr rig
-   switch (code) {
-     case lir_add: __ fadd_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
-     case lir_sub: __ fsub_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
-+    case lir_mul_strictfp: // fall through
-     case lir_mul: __ fmul_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
-+    case lir_div_strictfp: // fall through
-     case lir_div: __ fdiv_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
-     default:
-       ShouldNotReachHere();
-@@ -251,7 +253,9 @@ void LIR_Assembler::arith_op_double_fpu(LIR_Code code, LIR_Opr left, LIR_Opr rig
-     switch (code) {
-       case lir_add: __ fadd_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
-       case lir_sub: __ fsub_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
-+      case lir_mul_strictfp: // fall through
-       case lir_mul: __ fmul_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
-+      case lir_div_strictfp: // fall through
-       case lir_div: __ fdiv_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
-       default:
-         ShouldNotReachHere();
-diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-index 2aba4f4974f..21ae066e9ab 100644
---- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-@@ -360,7 +360,12 @@ void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) {
-   right.load_item();
- 
-   LIR_Opr reg = rlock(x);
--  arithmetic_op_fpu(x->op(), reg, left.result(), right.result());
-+  LIR_Opr tmp = LIR_OprFact::illegalOpr;
-+  if (x->is_strictfp() && (x->op() == Bytecodes::_dmul || x->op() == Bytecodes::_ddiv)) {
-+    tmp = new_register(T_DOUBLE);
-+  }
-+
-+  arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp());
- 
-   set_result(x, round_item(reg));
- }
-
-From 02c0a84d52417d4aeddbdd10c07df446ee45c5de Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 18:19:51 +0800
-Subject: [PATCH 072/140] Revert JDK-8276217: Harmonize StrictMath intrinsics
- handling
-
----
- src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp | 6 ++----
- 1 file changed, 2 insertions(+), 4 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-index 21ae066e9ab..f9242251491 100644
---- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-@@ -651,16 +651,14 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
-       do_LibmIntrinsic(x);
-       break;
-     case vmIntrinsics::_dabs: // fall through
--    case vmIntrinsics::_dsqrt: // fall through
--    case vmIntrinsics::_dsqrt_strict: {
-+    case vmIntrinsics::_dsqrt: {
-       assert(x->number_of_arguments() == 1, "wrong type");
-       LIRItem value(x->argument_at(0), this);
-       value.load_item();
-       LIR_Opr dst = rlock_result(x);
- 
-       switch (x->id()) {
--        case vmIntrinsics::_dsqrt: // fall through
--        case vmIntrinsics::_dsqrt_strict: {
-+        case vmIntrinsics::_dsqrt: {
-           __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr);
-           break;
-         }
-
-From 8dbace163d42cbb41ff49463b34f8971437fe82f Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 18:35:08 +0800
-Subject: [PATCH 073/140] Revert JDK-8276209: Some call sites doesn't pass the
- parameter 'size' to SharedRuntime::dtrace_object_alloc(_base)
-
----
- src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp   | 2 +-
- src/hotspot/cpu/riscv/templateTable_riscv.cpp | 2 +-
- 2 files changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
-index fc88d5c180e..329df2e1ca7 100644
---- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
-@@ -1186,7 +1186,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
-         StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments);
-         save_live_registers(sasm);
- 
--        __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast<int (*)(oopDesc*)>(SharedRuntime::dtrace_object_alloc)), c_rarg0);
-+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), c_rarg0);
- 
-         restore_live_registers(sasm);
-       }
-diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-index 2a92fb9dd49..ddc9498dddc 100644
---- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-@@ -3577,7 +3577,7 @@ void TemplateTable::_new() {
-       SkipIfEqual skip(_masm, &DTraceAllocProbes, false);
-       // Trigger dtrace event for fastpath
-       __ push(atos); // save the return value
--      __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast<int (*)(oopDesc*)>(SharedRuntime::dtrace_object_alloc)), x10);
-+      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), x10);
-       __ pop(atos); // restore the return value
-     }
-     __ j(done);
-
-From 8930b6049a5b6e31ec9409c167b0e58d24cf6821 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 18:38:51 +0800
-Subject: [PATCH 074/140] Revert JDK-8229838: Rename markOop files to markWord
-
----
- src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp | 1 -
- src/hotspot/cpu/riscv/frame_riscv.cpp             | 1 -
- src/hotspot/cpu/riscv/interp_masm_riscv.cpp       | 1 -
- 3 files changed, 3 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-index e486f41948e..44ceccd8bd1 100644
---- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-@@ -33,7 +33,6 @@
- #include "gc/shared/collectedHeap.hpp"
- #include "interpreter/interpreter.hpp"
- #include "oops/arrayOop.hpp"
--#include "oops/markWord.hpp"
- #include "runtime/basicLock.hpp"
- #include "runtime/biasedLocking.hpp"
- #include "runtime/os.hpp"
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
-index 13c482b610a..050595389e9 100644
---- a/src/hotspot/cpu/riscv/frame_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
-@@ -29,7 +29,6 @@
- #include "interpreter/interpreter.hpp"
- #include "memory/resourceArea.hpp"
- #include "memory/universe.hpp"
--#include "oops/markWord.hpp"
- #include "oops/method.hpp"
- #include "oops/oop.inline.hpp"
- #include "prims/methodHandles.hpp"
-diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-index 2fc0b00e2cb..006fe49b155 100644
---- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-@@ -33,7 +33,6 @@
- #include "interpreter/interpreterRuntime.hpp"
- #include "logging/log.hpp"
- #include "oops/arrayOop.hpp"
--#include "oops/markWord.hpp"
- #include "oops/method.hpp"
- #include "oops/methodData.hpp"
- #include "prims/jvmtiExport.hpp"
-
-From f11c5a2beca94c8248c30899fef90947d478e10c Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 18:42:33 +0800
-Subject: [PATCH 075/140] Revert JDK-8235673: [C1, C2] Split inlining control
- flags
-
----
- src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-index fe46f7b21c8..fd25f8f9afd 100644
---- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-@@ -42,6 +42,7 @@ define_pd_global(bool, TieredCompilation,            false);
- define_pd_global(intx, CompileThreshold,             1500 );
- 
- define_pd_global(intx, OnStackReplacePercentage,     933  );
-+define_pd_global(intx, FreqInlineSize,               325  );
- define_pd_global(intx, NewSizeThreadIncrease,        4*K  );
- define_pd_global(intx, InitialCodeCacheSize,         160*K);
- define_pd_global(intx, ReservedCodeCacheSize,        32*M );
-
-From 6908dc58f2c66ca6a5adf4444a7ec2a91a80b9c8 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 18:45:00 +0800
-Subject: [PATCH 076/140] Revert JDK-8262074: Consolidate the default value of
- MetaspaceSize
-
----
- src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 1 +
- src/hotspot/cpu/riscv/c2_globals_riscv.hpp | 3 +++
- 2 files changed, 4 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-index fd25f8f9afd..1c55a23eecf 100644
---- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-@@ -53,6 +53,7 @@ define_pd_global(bool, ProfileInterpreter,           false);
- define_pd_global(intx, CodeCacheExpansionSize,       32*K );
- define_pd_global(uintx, CodeCacheMinBlockLength,     1);
- define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
-+define_pd_global(uintx, MetaspaceSize,               12*M );
- define_pd_global(bool, NeverActAsServerClassMachine, true );
- define_pd_global(uint64_t, MaxRAM,                  1ULL*G);
- define_pd_global(bool, CICompileOSR,                 true );
-diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
-index 53a41665f4b..d9e5fcc1bb0 100644
---- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
-@@ -75,6 +75,9 @@ define_pd_global(intx, NonNMethodCodeHeapSize,       5*M );
- define_pd_global(uintx, CodeCacheMinBlockLength,     6);
- define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
- 
-+// Heap related flags
-+define_pd_global(uintx,MetaspaceSize,    ScaleForWordSize(16*M));
-+
- // Ergonomics related flags
- define_pd_global(bool, NeverActAsServerClassMachine, false);
- 
-
-From a3e991b37781d90c822471b54ace915622bee0da Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 18:48:15 +0800
-Subject: [PATCH 077/140] Revert JDK-8246023: Obsolete LIRFillDelaySlot
-
----
- src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-index 1c55a23eecf..bd8d039de03 100644
---- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-@@ -60,6 +60,7 @@ define_pd_global(bool, CICompileOSR,                 true );
- #endif // !COMPILER2
- define_pd_global(bool, UseTypeProfile,               false);
- 
-+define_pd_global(bool, LIRFillDelaySlots,            false);
- define_pd_global(bool, OptimizeSinglePrecision,      true );
- define_pd_global(bool, CSEArrayLength,               false);
- define_pd_global(bool, TwoOperandLIRForm,            false);
-
-From 9f6082ae9810e6a26c6803cb37cce62297d15a74 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 18:50:27 +0800
-Subject: [PATCH 078/140] Revert JDK-8136414: Large performance penalty
- declaring a method strictfp on strict-only platforms
-
----
- src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-index bd8d039de03..16a87b7aced 100644
---- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-@@ -59,6 +59,7 @@ define_pd_global(uint64_t, MaxRAM,                  1ULL*G);
- define_pd_global(bool, CICompileOSR,                 true );
- #endif // !COMPILER2
- define_pd_global(bool, UseTypeProfile,               false);
-+define_pd_global(bool, RoundFPResults,               true );
- 
- define_pd_global(bool, LIRFillDelaySlots,            false);
- define_pd_global(bool, OptimizeSinglePrecision,      true );
-
-From fbf03fc61be068f7f7c8ca1ab3854cc05519c5a3 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Tue, 25 Apr 2023 18:58:36 +0800
-Subject: [PATCH 079/140] Revert JDK-8251462: Simplify compilation policy
-
----
- src/hotspot/cpu/riscv/c1_globals_riscv.hpp    |   4 +-
- src/hotspot/cpu/riscv/c2_globals_riscv.hpp    |   2 +-
- src/hotspot/cpu/riscv/globals_riscv.hpp       |   2 +-
- .../templateInterpreterGenerator_riscv.cpp    | 114 +++++++++---
- src/hotspot/cpu/riscv/templateTable_riscv.cpp | 176 ++++++++++++------
- 5 files changed, 210 insertions(+), 88 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-index 16a87b7aced..8f2f4e0e81d 100644
---- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-@@ -32,7 +32,7 @@
- // Sets the default values for platform dependent flags used by the client compiler.
- // (see c1_globals.hpp)
- 
--#ifndef COMPILER2
-+#ifndef TIERED
- define_pd_global(bool, BackgroundCompilation,        true );
- define_pd_global(bool, InlineIntrinsics,             true );
- define_pd_global(bool, PreferInterpreterNativeStubs, false);
-@@ -57,7 +57,7 @@ define_pd_global(uintx, MetaspaceSize,               12*M );
- define_pd_global(bool, NeverActAsServerClassMachine, true );
- define_pd_global(uint64_t, MaxRAM,                  1ULL*G);
- define_pd_global(bool, CICompileOSR,                 true );
--#endif // !COMPILER2
-+#endif // !TIERED
- define_pd_global(bool, UseTypeProfile,               false);
- define_pd_global(bool, RoundFPResults,               true );
- 
-diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
-index d9e5fcc1bb0..6c301cdae04 100644
---- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
-@@ -39,7 +39,7 @@ define_pd_global(bool, PreferInterpreterNativeStubs, false);
- define_pd_global(bool, ProfileTraps,                 true);
- define_pd_global(bool, UseOnStackReplacement,        true);
- define_pd_global(bool, ProfileInterpreter,           true);
--define_pd_global(bool, TieredCompilation,            COMPILER1_PRESENT(true) NOT_COMPILER1(false));
-+define_pd_global(bool, TieredCompilation,            trueInTiered);
- define_pd_global(intx, CompileThreshold,             10000);
- 
- define_pd_global(intx, OnStackReplacePercentage,     140);
-diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
-index 50bbb6a77b8..b78f258a764 100644
---- a/src/hotspot/cpu/riscv/globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
-@@ -36,7 +36,7 @@ define_pd_global(bool, ImplicitNullChecks,       true);  // Generate code for im
- define_pd_global(bool, TrapBasedNullChecks,      false);
- define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs past to check cast
- 
--define_pd_global(uintx, CodeCacheSegmentSize,    64 COMPILER1_AND_COMPILER2_PRESENT(+64)); // Tiered compilation has large code-entry alignment.
-+define_pd_global(uintx, CodeCacheSegmentSize,    64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment.
- define_pd_global(intx, CodeEntryAlignment,       64);
- define_pd_global(intx, OptoLoopAlignment,        16);
- 
-diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-index a10677bf650..8aea4eca048 100644
---- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-@@ -556,31 +556,81 @@ address TemplateInterpreterGenerator::generate_safept_entry_for(TosState state,
- //
- // xmethod: method
- //
--void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow) {
-+void TemplateInterpreterGenerator::generate_counter_incr(
-+        Label* overflow,
-+        Label* profile_method,
-+        Label* profile_method_continue) {
-   Label done;
-   // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not.
--  int increment = InvocationCounter::count_increment;
--  Label no_mdo;
--  if (ProfileInterpreter) {
--    // Are we profiling?
--    __ ld(x10, Address(xmethod, Method::method_data_offset()));
--    __ beqz(x10, no_mdo);
--    // Increment counter in the MDO
--    const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) +
--                                         in_bytes(InvocationCounter::counter_offset()));
--    const Address mask(x10, in_bytes(MethodData::invoke_mask_offset()));
--    __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow);
--    __ j(done);
-+  if (TieredCompilation) {
-+    int increment = InvocationCounter::count_increment;
-+    Label no_mdo;
-+    if (ProfileInterpreter) {
-+      // Are we profiling?
-+      __ ld(x10, Address(xmethod, Method::method_data_offset()));
-+      __ beqz(x10, no_mdo);
-+      // Increment counter in the MDO
-+      const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) +
-+                                                in_bytes(InvocationCounter::counter_offset()));
-+      const Address mask(x10, in_bytes(MethodData::invoke_mask_offset()));
-+      __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow);
-+      __ j(done);
-+    }
-+    __ bind(no_mdo);
-+    // Increment counter in MethodCounters
-+    const Address invocation_counter(t1,
-+                  MethodCounters::invocation_counter_offset() +
-+                  InvocationCounter::counter_offset());
-+    __ get_method_counters(xmethod, t1, done);
-+    const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset()));
-+    __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow);
-+    __ bind(done);
-+  } else { // not TieredCompilation
-+    const Address backedge_counter(t1,
-+                  MethodCounters::backedge_counter_offset() +
-+                  InvocationCounter::counter_offset());
-+    const Address invocation_counter(t1,
-+                  MethodCounters::invocation_counter_offset() +
-+                  InvocationCounter::counter_offset());
-+
-+    __ get_method_counters(xmethod, t1, done);
-+
-+    if (ProfileInterpreter) { // %%% Merge this into MethodData*
-+      __ lwu(x11, Address(t1, MethodCounters::interpreter_invocation_counter_offset()));
-+      __ addw(x11, x11, 1);
-+      __ sw(x11, Address(t1, MethodCounters::interpreter_invocation_counter_offset()));
-+    }
-+    // Update standard invocation counters
-+    __ lwu(x11, invocation_counter);
-+    __ lwu(x10, backedge_counter);
-+
-+    __ addw(x11, x11, InvocationCounter::count_increment);
-+    __ andi(x10, x10, InvocationCounter::count_mask_value);
-+
-+    __ sw(x11, invocation_counter);
-+    __ addw(x10, x10, x11);                // add both counters
-+
-+    // profile_method is non-null only for interpreted method so
-+    // profile_method != NULL == !native_call
-+
-+    if (ProfileInterpreter && profile_method != NULL) {
-+      // Test to see if we should create a method data oop
-+      __ ld(t1, Address(xmethod, Method::method_counters_offset()));
-+      __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_profile_limit_offset())));
-+      __ blt(x10, t1, *profile_method_continue);
-+
-+      // if no method data exists, go to profile_method
-+      __ test_method_data_pointer(t1, *profile_method);
-+    }
-+
-+    {
-+      __ ld(t1, Address(xmethod, Method::method_counters_offset()));
-+      __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_invocation_limit_offset())));
-+      __ bltu(x10, t1, done);
-+      __ j(*overflow);
-+    }
-+    __ bind(done);
-   }
--  __ bind(no_mdo);
--  // Increment counter in MethodCounters
--  const Address invocation_counter(t1,
--                                   MethodCounters::invocation_counter_offset() +
--                                   InvocationCounter::counter_offset());
--  __ get_method_counters(xmethod, t1, done);
--  const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset()));
--  __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow);
--  __ bind(done);
- }
- 
- void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) {
-@@ -977,7 +1027,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
-   // increment invocation count & check for overflow
-   Label invocation_counter_overflow;
-   if (inc_counter) {
--    generate_counter_incr(&invocation_counter_overflow);
-+    generate_counter_incr(&invocation_counter_overflow, NULL, NULL);
-   }
- 
-   Label continue_after_compile;
-@@ -1389,8 +1439,15 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
- 
-   // increment invocation count & check for overflow
-   Label invocation_counter_overflow;
-+  Label profile_method;
-+  Label profile_method_continue;
-   if (inc_counter) {
--    generate_counter_incr(&invocation_counter_overflow);
-+    generate_counter_incr(&invocation_counter_overflow,
-+                          &profile_method,
-+                          &profile_method_continue);
-+    if (ProfileInterpreter) {
-+      __ bind(profile_method_continue);
-+    }
-   }
- 
-   Label continue_after_compile;
-@@ -1427,6 +1484,15 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
- 
-   // invocation counter overflow
-   if (inc_counter) {
-+    if (ProfileInterpreter) {
-+      // We have decided to profile this method in the interpreter
-+      __ bind(profile_method);
-+      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
-+      __ set_method_data_pointer_for_bcp();
-+      // don't think we need this
-+      __ get_method(x11);
-+      __ j(profile_method_continue);
-+    }
-     // Handle overflow of counter and compile method
-     __ bind(invocation_counter_overflow);
-     generate_counter_overflow(continue_after_compile);
-diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-index ddc9498dddc..bb20f228447 100644
---- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-@@ -1745,6 +1745,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide)
-   assert(UseLoopCounter || !UseOnStackReplacement,
-          "on-stack-replacement requires loop counters");
-   Label backedge_counter_overflow;
-+  Label profile_method;
-   Label dispatch;
-   if (UseLoopCounter) {
-     // increment backedge counter for backward branches
-@@ -1769,31 +1770,75 @@ void TemplateTable::branch(bool is_jsr, bool is_wide)
-     __ beqz(t0, dispatch); // No MethodCounters allocated, OutOfMemory
-     __ bind(has_counters);
- 
--    Label no_mdo;
--    int increment = InvocationCounter::count_increment;
--    if (ProfileInterpreter) {
--      // Are we profiling?
--      __ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset())));
--      __ beqz(x11, no_mdo);
--      // Increment the MDO backedge counter
--      const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) +
--                                         in_bytes(InvocationCounter::counter_offset()));
--      const Address mask(x11, in_bytes(MethodData::backedge_mask_offset()));
--      __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
--                                 x10, t0, false,
-+    if (TieredCompilation) {
-+      Label no_mdo;
-+      int increment = InvocationCounter::count_increment;
-+      if (ProfileInterpreter) {
-+        // Are we profiling?
-+        __ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset())));
-+        __ beqz(x11, no_mdo);
-+        // Increment the MDO backedge counter
-+        const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) +
-+                                           in_bytes(InvocationCounter::counter_offset()));
-+        const Address mask(x11, in_bytes(MethodData::backedge_mask_offset()));
-+        __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
-+                                   x10, t0, false,
-+                                   UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
-+        __ j(dispatch);
-+      }
-+      __ bind(no_mdo);
-+      // Increment backedge counter in MethodCounters*
-+      __ ld(t0, Address(xmethod, Method::method_counters_offset()));
-+      const Address mask(t0, in_bytes(MethodCounters::backedge_mask_offset()));
-+      __ increment_mask_and_jump(Address(t0, be_offset), increment, mask,
-+                                 x10, t1, false,
-                                  UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
--      __ j(dispatch);
-+    } else { // not TieredCompilation
-+      // increment counter
-+      __ ld(t1, Address(xmethod, Method::method_counters_offset()));
-+      __ lwu(x10, Address(t1, be_offset));     // load backedge counter
-+      __ addw(t0, x10, InvocationCounter::count_increment); // increment counter
-+      __ sw(t0, Address(t1, be_offset));       // store counter
-+
-+      __ lwu(x10, Address(t1, inv_offset));    // load invocation counter
-+      __ andi(x10, x10, (unsigned)InvocationCounter::count_mask_value, x13); // and the status bits
-+      __ addw(x10, x10, t0);        // add both counters
-+
-+      if (ProfileInterpreter) {
-+        // Test to see if we should create a method data oop
-+        __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_profile_limit_offset())));
-+        __ blt(x10, t0, dispatch);
-+
-+        // if no method data exists, go to profile method
-+        __ test_method_data_pointer(x10, profile_method);
-+
-+        if (UseOnStackReplacement) {
-+          // check for overflow against x11 which is the MDO taken count
-+          __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset())));
-+          __ bltu(x11, t0, dispatch); // Intel == Assembler::below, lo:unsigned lower
-+
-+          // When ProfileInterpreter is on, the backedge_count comes
-+          // from the MethodData*, which value does not get reset on
-+          // the call to frequency_counter_overflow().  To avoid
-+          // excessive calls to the overflow routine while the method is
-+          // being compiled, add a second test to make sure the overflow
-+          // function is called only once every overflow_frequency.
-+          const int overflow_frequency = 1024;
-+          __ andi(x11, x11, overflow_frequency - 1);
-+          __ beqz(x11, backedge_counter_overflow);
-+
-+        }
-+      } else {
-+        if (UseOnStackReplacement) {
-+          // check for overflow against x10, which is the sum of the
-+          // counters
-+          __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset())));
-+          __ bgeu(x10, t0, backedge_counter_overflow); // Intel == Assembler::aboveEqual
-+        }
-+      }
-     }
--    __ bind(no_mdo);
--    // Increment backedge counter in MethodCounters*
--    __ ld(t0, Address(xmethod, Method::method_counters_offset()));
--    const Address mask(t0, in_bytes(MethodCounters::backedge_mask_offset()));
--    __ increment_mask_and_jump(Address(t0, be_offset), increment, mask,
--                               x10, t1, false,
--                               UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
-     __ bind(dispatch);
-   }
--
-   // Pre-load the next target bytecode into t0
-   __ load_unsigned_byte(t0, Address(xbcp, 0));
- 
-@@ -1802,52 +1847,63 @@ void TemplateTable::branch(bool is_jsr, bool is_wide)
-   // xbcp: target bcp
-   __ dispatch_only(vtos, /*generate_poll*/true);
- 
--  if (UseLoopCounter && UseOnStackReplacement) {
--    // invocation counter overflow
--    __ bind(backedge_counter_overflow);
--    __ neg(x12, x12);
--    __ add(x12, x12, xbcp);     // branch xbcp
--    // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp)
--    __ call_VM(noreg,
--               CAST_FROM_FN_PTR(address,
--                                InterpreterRuntime::frequency_counter_overflow),
--               x12);
--    __ load_unsigned_byte(x11, Address(xbcp, 0));  // restore target bytecode
--
--    // x10: osr nmethod (osr ok) or NULL (osr not possible)
--    // w11: target bytecode
--    // x12: temporary
--    __ beqz(x10, dispatch);     // test result -- no osr if null
--    // nmethod may have been invalidated (VM may block upon call_VM return)
--    __ lbu(x12, Address(x10, nmethod::state_offset()));
--    if (nmethod::in_use != 0) {
--      __ sub(x12, x12, nmethod::in_use);
-+  if (UseLoopCounter) {
-+    if (ProfileInterpreter && !TieredCompilation) {
-+      // Out-of-line code to allocate method data oop.
-+      __ bind(profile_method);
-+      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
-+      __ load_unsigned_byte(x11, Address(xbcp, 0));  // restore target bytecode
-+      __ set_method_data_pointer_for_bcp();
-+      __ j(dispatch);
-     }
--    __ bnez(x12, dispatch);
- 
--    // We have the address of an on stack replacement routine in x10
--    // We need to prepare to execute the OSR method. First we must
--    // migrate the locals and monitors off of the stack.
-+    if (UseOnStackReplacement) {
-+      // invocation counter overflow
-+      __ bind(backedge_counter_overflow);
-+      __ neg(x12, x12);
-+      __ add(x12, x12, xbcp);     // branch xbcp
-+      // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp)
-+      __ call_VM(noreg,
-+                 CAST_FROM_FN_PTR(address,
-+                                  InterpreterRuntime::frequency_counter_overflow),
-+                 x12);
-+      __ load_unsigned_byte(x11, Address(xbcp, 0));  // restore target bytecode
-+
-+      // x10: osr nmethod (osr ok) or NULL (osr not possible)
-+      // w11: target bytecode
-+      // x12: temporary
-+      __ beqz(x10, dispatch);     // test result -- no osr if null
-+      // nmethod may have been invalidated (VM may block upon call_VM return)
-+      __ lbu(x12, Address(x10, nmethod::state_offset()));
-+      if (nmethod::in_use != 0) {
-+        __ sub(x12, x12, nmethod::in_use);
-+      }
-+      __ bnez(x12, dispatch);
-+
-+      // We have the address of an on stack replacement routine in x10
-+      // We need to prepare to execute the OSR method. First we must
-+      // migrate the locals and monitors off of the stack.
- 
--    __ mv(x9, x10);                             // save the nmethod
-+      __ mv(x9, x10);                             // save the nmethod
- 
--    call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
-+      call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
- 
--    // x10 is OSR buffer, move it to expected parameter location
--    __ mv(j_rarg0, x10);
-+      // x10 is OSR buffer, move it to expected parameter location
-+      __ mv(j_rarg0, x10);
- 
--    // remove activation
--    // get sender esp
--    __ ld(esp,
--        Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize));
--    // remove frame anchor
--    __ leave();
--    // Ensure compiled code always sees stack at proper alignment
--    __ andi(sp, esp, -16);
-+      // remove activation
-+      // get sender esp
-+      __ ld(esp,
-+          Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize));
-+      // remove frame anchor
-+      __ leave();
-+      // Ensure compiled code always sees stack at proper alignment
-+      __ andi(sp, esp, -16);
- 
--    // and begin the OSR nmethod
--    __ ld(t0, Address(x9, nmethod::osr_entry_point_offset()));
--    __ jr(t0);
-+      // and begin the OSR nmethod
-+      __ ld(t0, Address(x9, nmethod::osr_entry_point_offset()));
-+      __ jr(t0);
-+    }
-   }
- }
- 
-
-From b1f3fd0510681324d70028443a3532d6084be504 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 27 Apr 2023 11:37:05 +0800
-Subject: [PATCH 080/140] Revert JDK-8250902: Implement MD5 Intrinsics on x86
-
----
- src/hotspot/cpu/riscv/vm_version_riscv.cpp    |  5 ----
- ...nericTestCaseForUnsupportedRISCV64CPU.java | 30 +++++++++----------
- 2 files changed, 15 insertions(+), 20 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-index c0491d23fa6..d4b79162d84 100644
---- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-@@ -97,11 +97,6 @@ void VM_Version::initialize() {
-     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
-   }
- 
--  if (UseMD5Intrinsics) {
--    warning("MD5 intrinsics are not available on this CPU.");
--    FLAG_SET_DEFAULT(UseMD5Intrinsics, false);
--  }
--
-   if (UseRVV) {
-     if (!(_features & CPU_V)) {
-       warning("RVV is not supported on this CPU");
-diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
-index 2ecfec07a4c..8566d57c391 100644
---- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
-+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
-@@ -24,7 +24,7 @@
- 
- package compiler.intrinsics.sha.cli.testcases;
- 
--import compiler.intrinsics.sha.cli.DigestOptionsBase;
-+import compiler.intrinsics.sha.cli.SHAOptionsBase;
- import jdk.test.lib.process.ExitCode;
- import jdk.test.lib.Platform;
- import jdk.test.lib.cli.CommandLineOptionTest;
-@@ -36,7 +36,7 @@
-  * which don't support instruction required by the tested option.
-  */
- public class GenericTestCaseForUnsupportedRISCV64CPU extends
--        DigestOptionsBase.TestCase {
-+        SHAOptionsBase.TestCase {
- 
-     final private boolean checkUseSHA;
- 
-@@ -46,7 +46,7 @@ public GenericTestCaseForUnsupportedRISCV64CPU(String optionName) {
- 
-     public GenericTestCaseForUnsupportedRISCV64CPU(String optionName, boolean checkUseSHA) {
-         super(optionName, new AndPredicate(Platform::isRISCV64,
--                new NotPredicate(DigestOptionsBase.getPredicateForOption(
-+                new NotPredicate(SHAOptionsBase.getPredicateForOption(
-                         optionName))));
- 
-         this.checkUseSHA = checkUseSHA;
-@@ -58,27 +58,27 @@ protected void verifyWarnings() throws Throwable {
-                 + "option '-XX:-%s' without any warnings", optionName);
-         //Verify that option could be disabled without any warnings.
-         CommandLineOptionTest.verifySameJVMStartup(null, new String[] {
--                        DigestOptionsBase.getWarningForUnsupportedCPU(optionName)
-+                        SHAOptionsBase.getWarningForUnsupportedCPU(optionName)
-                 }, shouldPassMessage, shouldPassMessage, ExitCode.OK,
--                DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-+                SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-                 CommandLineOptionTest.prepareBooleanFlag(optionName, false));
- 
-         if (checkUseSHA) {
-             shouldPassMessage = String.format("If JVM is started with '-XX:-"
-                     + "%s' '-XX:+%s', output should contain warning.",
--                    DigestOptionsBase.USE_SHA_OPTION, optionName);
-+                    SHAOptionsBase.USE_SHA_OPTION, optionName);
- 
-             // Verify that when the tested option is enabled, then
-             // a warning will occur in VM output if UseSHA is disabled.
--            if (!optionName.equals(DigestOptionsBase.USE_SHA_OPTION)) {
-+            if (!optionName.equals(SHAOptionsBase.USE_SHA_OPTION)) {
-                 CommandLineOptionTest.verifySameJVMStartup(
--                        new String[] { DigestOptionsBase.getWarningForUnsupportedCPU(optionName) },
-+                        new String[] { SHAOptionsBase.getWarningForUnsupportedCPU(optionName) },
-                         null,
-                         shouldPassMessage,
-                         shouldPassMessage,
-                         ExitCode.OK,
--                        DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
--                        CommandLineOptionTest.prepareBooleanFlag(DigestOptionsBase.USE_SHA_OPTION, false),
-+                        SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-+                        CommandLineOptionTest.prepareBooleanFlag(SHAOptionsBase.USE_SHA_OPTION, false),
-                         CommandLineOptionTest.prepareBooleanFlag(optionName, true));
-             }
-         }
-@@ -90,7 +90,7 @@ protected void verifyOptionValues() throws Throwable {
-         CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
-                 String.format("Option '%s' should be disabled by default",
-                         optionName),
--                DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS);
-+                SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS);
- 
-         if (checkUseSHA) {
-             // Verify that option is disabled even if it was explicitly enabled
-@@ -98,7 +98,7 @@ protected void verifyOptionValues() throws Throwable {
-             CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
-                     String.format("Option '%s' should be off on unsupported "
-                             + "RISCV64CPU even if set to true directly", optionName),
--                    DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-+                    SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-                     CommandLineOptionTest.prepareBooleanFlag(optionName, true));
- 
-             // Verify that option is disabled when +UseSHA was passed to JVM.
-@@ -106,10 +106,10 @@ protected void verifyOptionValues() throws Throwable {
-                     String.format("Option '%s' should be off on unsupported "
-                             + "RISCV64CPU even if %s flag set to JVM",
-                             optionName, CommandLineOptionTest.prepareBooleanFlag(
--                                DigestOptionsBase.USE_SHA_OPTION, true)),
--                    DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-+                                  SHAOptionsBase.USE_SHA_OPTION, true)),
-+                    SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-                     CommandLineOptionTest.prepareBooleanFlag(
--                            DigestOptionsBase.USE_SHA_OPTION, true));
-+                            SHAOptionsBase.USE_SHA_OPTION, true));
-         }
-     }
- }
-
-From b5e96cb7663b2def3a064b9aede7209fb0c5eeda Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 27 Apr 2023 15:41:48 +0800
-Subject: [PATCH 081/140] Revert JDK-8253555: Make ByteSize and WordSize typed
- scoped enums
-
----
- src/hotspot/cpu/riscv/assembler_riscv.hpp | 4 +++-
- 1 file changed, 3 insertions(+), 1 deletion(-)
-
-diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
-index 31aeeb9b425..9959ac1d02c 100644
---- a/src/hotspot/cpu/riscv/assembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
-@@ -195,8 +195,10 @@ class Address {
-     : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
-   Address(Register r, unsigned long long o)
-     : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
-+#ifdef ASSERT
-   Address(Register r, ByteSize disp)
--    : Address(r, in_bytes(disp)) { }
-+    : _base(r), _index(noreg), _offset(in_bytes(disp)), _mode(base_plus_offset), _target(0) { }
-+#endif
-   Address(address target, RelocationHolder const& rspec)
-     : _base(noreg),
-       _index(noreg),
-
-From 592afab705a4d4c8b2773a0808e47efc2a14517d Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sat, 29 Apr 2023 15:18:12 +0800
-Subject: [PATCH 082/140] Revert JDK-8253457: Remove unimplemented register
- stack functions
-
----
- .../os_cpu/linux_riscv/thread_linux_riscv.hpp    | 16 ++++++++++++++++
- 1 file changed, 16 insertions(+)
-
-diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
-index 61e2cf85b63..313a7b932c3 100644
---- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
-+++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
-@@ -34,15 +34,31 @@
-   frame pd_last_frame();
- 
-  public:
-+
-+  void set_base_of_stack_pointer(intptr_t* base_sp) {
-+  }
-+
-   static ByteSize last_Java_fp_offset()          {
-     return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset();
-   }
- 
-+  intptr_t* base_of_stack_pointer() {
-+    return NULL;
-+  }
-+  void record_base_of_stack_pointer() {
-+  }
-+
-   bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext,
-     bool isInJava);
- 
-   bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava);
- private:
-   bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava);
-+public:
-+  // These routines are only used on cpu architectures that
-+  // have separate register stacks (Itanium).
-+  static bool register_stack_overflow() { return false; }
-+  static void enable_register_stack_guard() {}
-+  static void disable_register_stack_guard() {}
- 
- #endif // OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP
-
-From 28238cf776bd25c9805d9dd686c08fe8d3a1500b Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sat, 29 Apr 2023 15:22:30 +0800
-Subject: [PATCH 083/140] Revert JDK-8253539: Remove unused JavaThread
- functions for set_last_Java_fp/pc
-
----
- src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp       | 3 +++
- src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp | 3 +++
- 2 files changed, 6 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
-index 9a6084afa1d..5a0c9b812fc 100644
---- a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
-@@ -83,4 +83,7 @@
- 
-   intptr_t* last_Java_fp(void)                   { return _last_Java_fp; }
- 
-+  // Assert (last_Java_sp == NULL || fp == NULL)
-+  void set_last_Java_fp(intptr_t* fp)            { OrderAccess::release(); _last_Java_fp = fp; }
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java
+new file mode 100644
+index 0000000000..19f64b8ce2
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java
+@@ -0,0 +1,46 @@
++/*
++ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
- #endif // CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
-diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
-index 313a7b932c3..4b91fa855ae 100644
---- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
-+++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
-@@ -34,6 +34,9 @@
-   frame pd_last_frame();
- 
-  public:
-+  // Mutators are highly dangerous....
-+  intptr_t* last_Java_fp()                       { return _anchor.last_Java_fp(); }
-+  void  set_last_Java_fp(intptr_t* fp)           { _anchor.set_last_Java_fp(fp);   }
- 
-   void set_base_of_stack_pointer(intptr_t* base_sp) {
-   }
-
-From f9322bb6235b603eac825c6e6751093ada1e6cfe Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sat, 29 Apr 2023 15:45:56 +0800
-Subject: [PATCH 084/140] Revert JDK-8269853: Prefetch::read should accept
- pointer to const
-
----
- src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
-index a6432c84ec7..2bd48e09c34 100644
---- a/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
-+++ b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
-@@ -29,7 +29,7 @@
- #include "runtime/prefetch.hpp"
- 
- 
--inline void Prefetch::read (const void *loc, intx interval) {
-+inline void Prefetch::read (void *loc, intx interval) {
- }
- 
- inline void Prefetch::write(void *loc, intx interval) {
-
-From aa6f7320d8d849b8e47b6e77a20257e3d99fd14f Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sat, 29 Apr 2023 16:14:55 +0800
-Subject: [PATCH 085/140] Revert: JDK-8254231: Implementation of Foreign Linker
- API (Incubator) JDK-8264774: Implementation of Foreign Function and Memory
- API (Incubator)
-
----
- .../cpu/riscv/foreign_globals_riscv.cpp       | 44 -------------------
- .../cpu/riscv/foreign_globals_riscv.hpp       | 32 --------------
- src/hotspot/cpu/riscv/frame_riscv.cpp         | 15 -------
- src/hotspot/cpu/riscv/methodHandles_riscv.cpp | 12 +----
- src/hotspot/cpu/riscv/riscv.ad                |  5 ---
- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 10 +----
- .../riscv/universalNativeInvoker_riscv.cpp    | 33 --------------
- .../cpu/riscv/universalUpcallHandle_riscv.cpp | 42 ------------------
- src/hotspot/cpu/riscv/vmreg_riscv.cpp         |  5 ---
- 9 files changed, 2 insertions(+), 196 deletions(-)
- delete mode 100644 src/hotspot/cpu/riscv/foreign_globals_riscv.cpp
- delete mode 100644 src/hotspot/cpu/riscv/foreign_globals_riscv.hpp
- delete mode 100644 src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp
- delete mode 100644 src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp
-
-diff --git a/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp b/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp
-deleted file mode 100644
-index 5c700be9c91..00000000000
---- a/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp
-+++ /dev/null
-@@ -1,44 +0,0 @@
--/*
-- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#include "precompiled.hpp"
--#include "prims/foreign_globals.hpp"
--#include "utilities/debug.hpp"
--
--// Stubbed out, implement later
--const ABIDescriptor ForeignGlobals::parse_abi_descriptor_impl(jobject jabi) const {
--  Unimplemented();
--  return {};
--}
--
--const BufferLayout ForeignGlobals::parse_buffer_layout_impl(jobject jlayout) const {
--  Unimplemented();
--  return {};
--}
--
--const CallRegs ForeignGlobals::parse_call_regs_impl(jobject jconv) const {
--  ShouldNotCallThis();
--  return {};
--}
-diff --git a/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp b/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp
-deleted file mode 100644
-index 3ac89752c27..00000000000
---- a/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp
-+++ /dev/null
-@@ -1,32 +0,0 @@
--/*
-- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#ifndef CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP
--#define CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP
--
--class ABIDescriptor {};
--class BufferLayout {};
--
--#endif // CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
-index 050595389e9..40ec584b994 100644
---- a/src/hotspot/cpu/riscv/frame_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
-@@ -361,21 +361,6 @@ frame frame::sender_for_entry_frame(RegisterMap* map) const {
-   return fr;
- }
- 
--OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const {
--  ShouldNotCallThis();
--  return nullptr;
--}
--
--bool frame::optimized_entry_frame_is_first() const {
--  ShouldNotCallThis();
--  return false;
--}
--
--frame frame::sender_for_optimized_entry_frame(RegisterMap* map) const {
--  ShouldNotCallThis();
--  return {};
--}
--
- //------------------------------------------------------------------------------
- // frame::verify_deopt_original_pc
- //
-diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
-index 1f7c0c87c21..3bf5cfb16c3 100644
---- a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
-@@ -181,13 +181,6 @@ address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler*
-     return NULL;
-   }
- 
--  // No need in interpreter entry for linkToNative for now.
--  // Interpreter calls compiled entry through i2c.
--  if (iid == vmIntrinsics::_linkToNative) {
--    __ ebreak();
--    return NULL;
--  }
--
-   // x30: sender SP (must preserve; see prepare_to_jump_from_interpreted)
-   // xmethod: Method*
-   // x13: argument locator (parameter slot count, added to sp)
-@@ -280,10 +273,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
-   assert_different_registers(temp1, temp2, temp3, receiver_reg);
-   assert_different_registers(temp1, temp2, temp3, member_reg);
- 
--  if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) {
--    if (iid == vmIntrinsics::_linkToNative) {
--      assert(for_compiler_entry, "only compiler entry is supported");
--    }
-+  if (iid == vmIntrinsics::_invokeBasic) {
-     // indirect through MH.form.vmentry.vmtarget
-     jump_to_lambda_form(_masm, receiver_reg, xmethod, temp1, for_compiler_entry);
-   } else {
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 1667994699f..7ec76e72ff0 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -945,11 +945,6 @@ int MachCallRuntimeNode::ret_addr_offset() {
-   }
- }
- 
--int MachCallNativeNode::ret_addr_offset() {
--  Unimplemented();
--  return -1;
--}
--
- //
- // Compute padding required for nodes which need alignment
- //
-diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-index 411bddd2ace..897dafcc99c 100644
---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-@@ -1037,7 +1037,7 @@ static void gen_special_dispatch(MacroAssembler* masm,
-     member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
-     member_reg = x9;  // known to be free at this point
-     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
--  } else if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) {
-+  } else if (iid == vmIntrinsics::_invokeBasic) {
-     has_receiver = true;
-   } else {
-     fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid));
-@@ -2566,14 +2566,6 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha
- }
- 
- #ifdef COMPILER2
--RuntimeStub* SharedRuntime::make_native_invoker(address call_target,
--                                                int shadow_space_bytes,
--                                                const GrowableArray<VMReg>& input_registers,
--                                                const GrowableArray<VMReg>& output_registers) {
--  Unimplemented();
--  return nullptr;
--}
--
- //------------------------------generate_exception_blob---------------------------
- // creates exception blob at the end
- // Using exception blob, this code is jumped from a compiled method.
-diff --git a/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp b/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp
-deleted file mode 100644
-index 4f50adb05c3..00000000000
---- a/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp
-+++ /dev/null
-@@ -1,33 +0,0 @@
--/*
-- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#include "precompiled.hpp"
--#include "prims/universalNativeInvoker.hpp"
--#include "utilities/debug.hpp"
--
--address ProgrammableInvoker::generate_adapter(jobject jabi, jobject jlayout) {
--  Unimplemented();
--  return nullptr;
--}
-diff --git a/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp b/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp
-deleted file mode 100644
-index ce70da72f2e..00000000000
---- a/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp
-+++ /dev/null
-@@ -1,42 +0,0 @@
--/*
-- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#include "precompiled.hpp"
--#include "prims/universalUpcallHandler.hpp"
--#include "utilities/debug.hpp"
--
--address ProgrammableUpcallHandler::generate_upcall_stub(jobject jrec, jobject jabi, jobject jlayout) {
--  Unimplemented();
--  return nullptr;
--}
--
--address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject mh, Method* entry, jobject jabi, jobject jconv) {
--  ShouldNotCallThis();
--  return nullptr;
--}
--
--bool ProgrammableUpcallHandler::supports_optimized_upcalls() {
--  return false;
--}
-diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
-index 1f6eff96cba..5d1187c2a27 100644
---- a/src/hotspot/cpu/riscv/vmreg_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
-@@ -49,8 +49,3 @@ void VMRegImpl::set_regName() {
-     regName[i] = "NON-GPR-FPR";
-   }
- }
--
--VMReg VMRegImpl::vmStorageToVMReg(int type, int index) {
--  Unimplemented();
--  return VMRegImpl::Bad();
--}
-
-From a5889735a97f3712bb649c454dee192d75457f96 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sat, 29 Apr 2023 17:35:20 +0800
-Subject: [PATCH 086/140] Revert JDK-8256254: Convert vmIntrinsics::ID to enum
- class
-
----
- src/hotspot/cpu/riscv/interp_masm_riscv.cpp   | 2 +-
- src/hotspot/cpu/riscv/methodHandles_riscv.cpp | 2 +-
- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 2 +-
- 3 files changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-index 006fe49b155..1133e80a210 100644
---- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-@@ -1841,7 +1841,7 @@ void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret,
-       beq(t0, tmp, do_profile);
-       get_method(tmp);
-       lhu(t0, Address(tmp, Method::intrinsic_id_offset_in_bytes()));
--      li(t1, static_cast<int>(vmIntrinsics::_compiledLambdaForm));
-+      li(t1, vmIntrinsics::_compiledLambdaForm);
-       bne(t0, t1, profile_continue);
-       bind(do_profile);
-     }
-diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
-index 3bf5cfb16c3..4442b5991b1 100644
---- a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
-@@ -411,7 +411,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
-       }
- 
-       default:
--        fatal("unexpected intrinsic %d: %s", vmIntrinsics::as_int(iid), vmIntrinsics::name_at(iid));
-+        fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid));
-         break;
-     }
- 
-diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-index 897dafcc99c..5b934b04e8e 100644
---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-@@ -1040,7 +1040,7 @@ static void gen_special_dispatch(MacroAssembler* masm,
-   } else if (iid == vmIntrinsics::_invokeBasic) {
-     has_receiver = true;
-   } else {
--    fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid));
-+    fatal("unexpected intrinsic id %d", iid);
-   }
- 
-   if (member_reg != noreg) {
-
-From 245d01e2cae27e41b875450f5f92751e4f36a095 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Thu, 6 Apr 2023 20:27:58 +0800
-Subject: [PATCH 087/140] Revert JDK-8216557: Aarch64: Add support for
- Concurrent Class Unloading
-
----
- .../cpu/riscv/c1_MacroAssembler_riscv.cpp     |   4 -
- .../gc/shared/barrierSetAssembler_riscv.cpp   |  71 --------
- .../gc/shared/barrierSetAssembler_riscv.hpp   |   3 -
- .../gc/shared/barrierSetNMethod_riscv.cpp     | 171 ------------------
- .../cpu/riscv/macroAssembler_riscv.cpp        |  35 +---
- .../cpu/riscv/macroAssembler_riscv.hpp        |   2 -
- src/hotspot/cpu/riscv/relocInfo_riscv.cpp     |   1 -
- src/hotspot/cpu/riscv/riscv.ad                |  16 --
- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp |   7 -
- src/hotspot/cpu/riscv/stubGenerator_riscv.cpp |  49 -----
- src/hotspot/cpu/riscv/stubRoutines_riscv.cpp  |   1 -
- src/hotspot/cpu/riscv/stubRoutines_riscv.hpp  |   6 -
- 12 files changed, 5 insertions(+), 361 deletions(-)
- delete mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp
-
-diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-index 44ceccd8bd1..a6d1b1470f9 100644
---- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-@@ -322,10 +322,6 @@ void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) {
-   // Note that we do this before creating a frame.
-   generate_stack_overflow_check(bang_size_in_bytes);
-   MacroAssembler::build_frame(framesize);
--
--  // Insert nmethod entry barrier into frame.
--  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
--  bs->nmethod_entry_barrier(this);
- }
- 
- void C1_MacroAssembler::remove_frame(int framesize) {
-diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
-index 3c115a2ea02..2b556b95d71 100644
---- a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
-@@ -27,7 +27,6 @@
- #include "classfile/classLoaderData.hpp"
- #include "gc/shared/barrierSet.hpp"
- #include "gc/shared/barrierSetAssembler.hpp"
--#include "gc/shared/barrierSetNMethod.hpp"
- #include "gc/shared/collectedHeap.hpp"
- #include "interpreter/interp_masm.hpp"
- #include "memory/universe.hpp"
-@@ -230,73 +229,3 @@ void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm,
-   }
-   __ sd(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset())));
- }
--
--void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) {
--  BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
--
--  if (bs_nm == NULL) {
--    return;
--  }
--
--  // RISCV atomic operations require that the memory address be naturally aligned.
--  __ align(4);
--
--  Label skip, guard;
--  Address thread_disarmed_addr(xthread, in_bytes(bs_nm->thread_disarmed_offset()));
--
--  __ lwu(t0, guard);
--
--  // Subsequent loads of oops must occur after load of guard value.
--  // BarrierSetNMethod::disarm sets guard with release semantics.
--  __ membar(MacroAssembler::LoadLoad);
--  __ lwu(t1, thread_disarmed_addr);
--  __ beq(t0, t1, skip);
--
--  int32_t offset = 0;
--  __ movptr_with_offset(t0, StubRoutines::riscv::method_entry_barrier(), offset);
--  __ jalr(ra, t0, offset);
--  __ j(skip);
--
--  __ bind(guard);
--
--  assert(__ offset() % 4 == 0, "bad alignment");
--  __ emit_int32(0); // nmethod guard value. Skipped over in common case.
--
--  __ bind(skip);
--}
--
--void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) {
--  BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod();
--  if (bs == NULL) {
--    return;
--  }
--
--  Label bad_call;
--  __ beqz(xmethod, bad_call);
--
--  // Pointer chase to the method holder to find out if the method is concurrently unloading.
--  Label method_live;
--  __ load_method_holder_cld(t0, xmethod);
--
--  // Is it a strong CLD?
--  __ lwu(t1, Address(t0, ClassLoaderData::keep_alive_offset()));
--  __ bnez(t1, method_live);
--
--  // Is it a weak but alive CLD?
--  __ push_reg(RegSet::of(x28, x29), sp);
--
--  __ ld(x28, Address(t0, ClassLoaderData::holder_offset()));
--
--  // Uses x28 & x29, so we must pass new temporaries.
--  __ resolve_weak_handle(x28, x29);
--  __ mv(t0, x28);
--
--  __ pop_reg(RegSet::of(x28, x29), sp);
--
--  __ bnez(t0, method_live);
--
--  __ bind(bad_call);
--
--  __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
--  __ bind(method_live);
--}
-diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
-index b85f7f5582b..984d94f4c3d 100644
---- a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
-@@ -28,7 +28,6 @@
- 
- #include "asm/macroAssembler.hpp"
- #include "gc/shared/barrierSet.hpp"
--#include "gc/shared/barrierSetNMethod.hpp"
- #include "memory/allocation.hpp"
- #include "oops/access.hpp"
- 
-@@ -71,8 +70,6 @@ class BarrierSetAssembler: public CHeapObj<mtGC> {
-   );
-   virtual void barrier_stubs_init() {}
- 
--  virtual void nmethod_entry_barrier(MacroAssembler* masm);
--  virtual void c2i_entry_barrier(MacroAssembler* masm);
-   virtual ~BarrierSetAssembler() {}
- };
- 
-diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp
-deleted file mode 100644
-index ae7ee4c5a44..00000000000
---- a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp
-+++ /dev/null
-@@ -1,171 +0,0 @@
--/*
-- * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#include "precompiled.hpp"
--#include "code/codeCache.hpp"
--#include "code/nativeInst.hpp"
--#include "gc/shared/barrierSetNMethod.hpp"
--#include "logging/log.hpp"
--#include "memory/resourceArea.hpp"
--#include "runtime/sharedRuntime.hpp"
--#include "runtime/registerMap.hpp"
--#include "runtime/thread.hpp"
--#include "utilities/align.hpp"
--#include "utilities/debug.hpp"
--
--class NativeNMethodBarrier: public NativeInstruction {
--  address instruction_address() const { return addr_at(0); }
--
--  int *guard_addr() {
--    /* auipc + lwu + fence + lwu + beq + lui + addi + slli + addi + slli + jalr + j */
--    return reinterpret_cast<int*>(instruction_address() + 12 * 4);
--  }
--
--public:
--  int get_value() {
--    return Atomic::load_acquire(guard_addr());
--  }
--
--  void set_value(int value) {
--    Atomic::release_store(guard_addr(), value);
--  }
--
--  void verify() const;
--};
--
--// Store the instruction bitmask, bits and name for checking the barrier.
--struct CheckInsn {
--  uint32_t mask;
--  uint32_t bits;
--  const char *name;
--};
--
--static const struct CheckInsn barrierInsn[] = {
--  { 0x00000fff, 0x00000297, "auipc  t0, 0           "},
--  { 0x000fffff, 0x0002e283, "lwu    t0, 48(t0)      "},
--  { 0xffffffff, 0x0aa0000f, "fence  ir, ir          "},
--  { 0x000fffff, 0x000be303, "lwu    t1, 112(xthread)"},
--  { 0x01fff07f, 0x00628063, "beq    t0, t1, skip    "},
--  { 0x00000fff, 0x000002b7, "lui    t0, imm0        "},
--  { 0x000fffff, 0x00028293, "addi   t0, t0, imm1    "},
--  { 0xffffffff, 0x00b29293, "slli   t0, t0, 11      "},
--  { 0x000fffff, 0x00028293, "addi   t0, t0, imm2    "},
--  { 0xffffffff, 0x00529293, "slli   t0, t0, 5       "},
--  { 0x000fffff, 0x000280e7, "jalr   ra, imm3(t0)    "},
--  { 0x00000fff, 0x0000006f, "j      skip            "}
--  /* guard: */
--  /* 32bit nmethod guard value */
--  /* skip: */
--};
--
--// The encodings must match the instructions emitted by
--// BarrierSetAssembler::nmethod_entry_barrier. The matching ignores the specific
--// register numbers and immediate values in the encoding.
--void NativeNMethodBarrier::verify() const {
--  intptr_t addr = (intptr_t) instruction_address();
--  for(unsigned int i = 0; i < sizeof(barrierInsn)/sizeof(struct CheckInsn); i++ ) {
--    uint32_t inst = *((uint32_t*) addr);
--    if ((inst & barrierInsn[i].mask) != barrierInsn[i].bits) {
--      tty->print_cr("Addr: " INTPTR_FORMAT " Code: 0x%x", addr, inst);
--      fatal("not an %s instruction.", barrierInsn[i].name);
--    }
--    addr += 4;
--  }
--}
--
--
--/* We're called from an nmethod when we need to deoptimize it. We do
--   this by throwing away the nmethod's frame and jumping to the
--   ic_miss stub. This looks like there has been an IC miss at the
--   entry of the nmethod, so we resolve the call, which will fall back
--   to the interpreter if the nmethod has been unloaded. */
--void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) {
--
--  typedef struct {
--    intptr_t *sp; intptr_t *fp; address ra; address pc;
--  } frame_pointers_t;
--
--  frame_pointers_t *new_frame = (frame_pointers_t *)(return_address_ptr - 5);
--
--  JavaThread *thread = JavaThread::current();
--  RegisterMap reg_map(thread, false);
--  frame frame = thread->last_frame();
--
--  assert(frame.is_compiled_frame() || frame.is_native_frame(), "must be");
--  assert(frame.cb() == nm, "must be");
--  frame = frame.sender(&reg_map);
--
--  LogTarget(Trace, nmethod, barrier) out;
--  if (out.is_enabled()) {
--    ResourceMark mark;
--    log_trace(nmethod, barrier)("deoptimize(nmethod: %s(%p), return_addr: %p, osr: %d, thread: %p(%s), making rsp: %p) -> %p",
--                                nm->method()->name_and_sig_as_C_string(),
--                                nm, *(address *) return_address_ptr, nm->is_osr_method(), thread,
--                                thread->name(), frame.sp(), nm->verified_entry_point());
--  }
--
--  new_frame->sp = frame.sp();
--  new_frame->fp = frame.fp();
--  new_frame->ra = frame.pc();
--  new_frame->pc = SharedRuntime::get_handle_wrong_method_stub();
--}
--
--// This is the offset of the entry barrier from where the frame is completed.
--// If any code changes between the end of the verified entry where the entry
--// barrier resides, and the completion of the frame, then
--// NativeNMethodCmpBarrier::verify() will immediately complain when it does
--// not find the expected native instruction at this offset, which needs updating.
--// Note that this offset is invariant of PreserveFramePointer.
--
--// see BarrierSetAssembler::nmethod_entry_barrier
--// auipc + lwu + fence + lwu + beq + movptr_with_offset(5 instructions) + jalr + j + int32
--static const int entry_barrier_offset = -4 * 13;
--
--static NativeNMethodBarrier* native_nmethod_barrier(nmethod* nm) {
--  address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset;
--  NativeNMethodBarrier* barrier = reinterpret_cast<NativeNMethodBarrier*>(barrier_address);
--  debug_only(barrier->verify());
--  return barrier;
--}
--
--void BarrierSetNMethod::disarm(nmethod* nm) {
--  if (!supports_entry_barrier(nm)) {
--    return;
--  }
--
--  // Disarms the nmethod guard emitted by BarrierSetAssembler::nmethod_entry_barrier.
--  NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
--
--  barrier->set_value(disarmed_value());
--}
--
--bool BarrierSetNMethod::is_armed(nmethod* nm) {
--  if (!supports_entry_barrier(nm)) {
--    return false;
--  }
--
--  NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
--  return barrier->get_value() != disarmed_value();
--}
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index 41a415ef2cf..a75bd9dfa89 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -1638,10 +1638,10 @@ void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp,
-   beq(trial_klass, tmp, L);
- }
- 
--// Move an oop into a register. immediate is true if we want
--// immediate instructions and nmethod entry barriers are not enabled.
--// i.e. we are not going to patch this instruction while the code is being
--// executed by another thread.
-+// Move an oop into a register.  immediate is true if we want
-+// immediate instructions, i.e. we are not going to patch this
-+// instruction while the code is being executed by another thread.  In
-+// that case we can use move immediates rather than the constant pool.
- void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) {
-   int oop_index;
-   if (obj == NULL) {
-@@ -1656,11 +1656,7 @@ void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) {
-     oop_index = oop_recorder()->find_index(obj);
-   }
-   RelocationHolder rspec = oop_Relocation::spec(oop_index);
--
--  // nmethod entry barrier necessitate using the constant pool. They have to be
--  // ordered with respected to oop access.
--  // Using immediate literals would necessitate fence.i.
--  if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate) {
-+  if (!immediate) {
-     address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address
-     ld_constant(dst, Address(dummy, rspec));
-   } else
-@@ -1738,22 +1734,6 @@ void MacroAssembler::resolve_oop_handle(Register result, Register tmp) {
-   access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, noreg);
- }
- 
--// ((WeakHandle)result).resolve()
--void MacroAssembler::resolve_weak_handle(Register result, Register tmp) {
--  assert_different_registers(result, tmp);
--  Label resolved;
--
--  // A null weak handle resolves to null.
--  beqz(result, resolved);
--
--  // Only 64 bit platforms support GCs that require a tmp register
--  // Only IN_HEAP loads require a thread_tmp register
--  // WeakHandle::resolve is an indirection like jweak.
--  access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
--                 result, Address(result), tmp, noreg /* tmp_thread */);
--  bind(resolved);
--}
--
- void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
-                                     Register dst, Address src,
-                                     Register tmp1, Register thread_tmp) {
-@@ -3195,11 +3175,6 @@ void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) {
-   beq(src1, t0, equal);
- }
- 
--void MacroAssembler::load_method_holder_cld(Register result, Register method) {
--  load_method_holder(result, method);
--  ld(result, Address(result, InstanceKlass::class_loader_data_offset()));
--}
--
- void MacroAssembler::load_method_holder(Register holder, Register method) {
-   ld(holder, Address(method, Method::const_offset()));                      // ConstMethod*
-   ld(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index dd39f67d507..b16fe904888 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -207,7 +207,6 @@ class MacroAssembler: public Assembler {
-   virtual void check_and_handle_earlyret(Register java_thread);
-   virtual void check_and_handle_popframe(Register java_thread);
- 
--  void resolve_weak_handle(Register result, Register tmp);
-   void resolve_oop_handle(Register result, Register tmp = x15);
-   void resolve_jobject(Register value, Register thread, Register tmp);
- 
-@@ -673,7 +672,6 @@ class MacroAssembler: public Assembler {
-   void cmpptr(Register src1, Address src2, Label& equal);
- 
-   void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = NULL, Label* L_slow_path = NULL);
--  void load_method_holder_cld(Register result, Register method);
-   void load_method_holder(Register holder, Register method);
- 
-   void compute_index(Register str1, Register trailing_zeros, Register match_mask,
-diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
-index 228a64eae2c..047ea2276ca 100644
---- a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
-@@ -41,7 +41,6 @@ void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
-   switch (type()) {
-     case relocInfo::oop_type: {
-       oop_Relocation *reloc = (oop_Relocation *)this;
--      // in movoop when BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate
-       if (NativeInstruction::is_load_pc_relative_at(addr())) {
-         address constptr = (address)code()->oop_addr_at(reloc->oop_index());
-         bytes = MacroAssembler::pd_patch_instruction_size(addr(), constptr);
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 7ec76e72ff0..0a1838695e1 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1068,17 +1068,6 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
-   st->print("sd  ra, [sp, #%d]\n\t", - wordSize);
-   if (PreserveFramePointer) { st->print("sub  fp, sp, #%d\n\t", 2 * wordSize); }
-   st->print("sub sp, sp, #%d\n\t", framesize);
--
--  if (C->stub_function() == NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
--    st->print("ld  t0, [guard]\n\t");
--    st->print("membar LoadLoad\n\t");
--    st->print("ld  t1, [xthread, #thread_disarmed_offset]\n\t");
--    st->print("beq t0, t1, skip\n\t");
--    st->print("jalr #nmethod_entry_barrier_stub\n\t");
--    st->print("j skip\n\t");
--    st->print("guard: int\n\t");
--    st->print("skip:\n\t");
--  }
- }
- #endif
- 
-@@ -1114,11 +1103,6 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
- 
-   __ build_frame(framesize);
- 
--  if (C->stub_function() == NULL) {
--    BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
--    bs->nmethod_entry_barrier(&_masm);
--  }
--
-   if (VerifyStackAtCalls) {
-     Unimplemented();
-   }
-diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-index 5b934b04e8e..326ba62fcb0 100644
---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-@@ -642,9 +642,6 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
-     c2i_no_clinit_check_entry = __ pc();
-   }
- 
--  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
--  bs->c2i_entry_barrier(masm);
--
-   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
- 
-   __ flush();
-@@ -1290,10 +1287,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
-   // -2 because return address is already present and so is saved fp
-   __ sub(sp, sp, stack_size - 2 * wordSize);
- 
--  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
--  assert_cond(bs != NULL);
--  bs->nmethod_entry_barrier(masm);
--
-   // Frame is now completed as far as size and linkage.
-   int frame_complete = ((intptr_t)__ pc()) - start;
- 
-diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-index 0c5b0e001ee..74c38c3d044 100644
---- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-@@ -2352,50 +2352,6 @@ class StubGenerator: public StubCodeGenerator {
-     return entry;
-   }
- 
--  address generate_method_entry_barrier() {
--    __ align(CodeEntryAlignment);
--    StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");
--
--    Label deoptimize_label;
--
--    address start = __ pc();
--
--    __ set_last_Java_frame(sp, fp, ra, t0);
--
--    __ enter();
--    __ add(t1, sp, wordSize);
--
--    __ sub(sp, sp, 4 * wordSize);
--
--    __ push_call_clobbered_registers();
--
--    __ mv(c_rarg0, t1);
--    __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetNMethod::nmethod_stub_entry_barrier), 1);
--
--    __ reset_last_Java_frame(true);
--
--    __ mv(t0, x10);
--
--    __ pop_call_clobbered_registers();
--
--    __ bnez(t0, deoptimize_label);
--
--    __ leave();
--    __ ret();
--
--    __ BIND(deoptimize_label);
--
--    __ ld(t0, Address(sp, 0));
--    __ ld(fp, Address(sp, wordSize));
--    __ ld(ra, Address(sp, wordSize * 2));
--    __ ld(t1, Address(sp, wordSize * 3));
--
--    __ mv(sp, t0);
--    __ jr(t1);
--
--    return start;
--  }
--
-   // x10  = result
-   // x11  = str1
-   // x12  = cnt1
-@@ -3703,11 +3659,6 @@ class StubGenerator: public StubCodeGenerator {
- 
-     generate_string_indexof_stubs();
- 
--    BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
--    if (bs_nm != NULL) {
--      StubRoutines::riscv::_method_entry_barrier = generate_method_entry_barrier();
--    }
--
-     StubRoutines::riscv::set_completed();
-   }
- 
-diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
-index 395a2d338e4..9202d9ec4b0 100644
---- a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
-@@ -53,6 +53,5 @@ address StubRoutines::riscv::_string_indexof_linear_ll = NULL;
- address StubRoutines::riscv::_string_indexof_linear_uu = NULL;
- address StubRoutines::riscv::_string_indexof_linear_ul = NULL;
- address StubRoutines::riscv::_large_byte_array_inflate = NULL;
--address StubRoutines::riscv::_method_entry_barrier = NULL;
- 
- bool StubRoutines::riscv::_completed = false;
-diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
-index 51f07819c33..0c9445e18a7 100644
---- a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
-@@ -67,8 +67,6 @@ class riscv {
-   static address _string_indexof_linear_ul;
-   static address _large_byte_array_inflate;
- 
--  static address _method_entry_barrier;
--
-   static bool _completed;
- 
-  public:
-@@ -145,10 +143,6 @@ class riscv {
-     return _large_byte_array_inflate;
-   }
- 
--  static address method_entry_barrier() {
--    return _method_entry_barrier;
--  }
--
-   static bool complete() {
-     return _completed;
-   }
-
-From aee31440dde84c54449b5c0dbdfb43b4d3826f5a Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sat, 29 Apr 2023 17:59:40 +0800
-Subject: [PATCH 088/140] Revert JDK-8223173: Implement fast class
- initialization checks on AARCH64 && JDK-8227260: JNI upcalls should bypass
- class initialization barrier in c2i adapter
-
----
- .../cpu/riscv/c1_LIRAssembler_riscv.cpp       | 12 -------
- .../cpu/riscv/c1_MacroAssembler_riscv.cpp     | 12 +++----
- src/hotspot/cpu/riscv/interp_masm_riscv.cpp   | 12 -------
- src/hotspot/cpu/riscv/interp_masm_riscv.hpp   |  2 --
- .../cpu/riscv/macroAssembler_riscv.cpp        | 36 -------------------
- .../cpu/riscv/macroAssembler_riscv.hpp        |  3 --
- src/hotspot/cpu/riscv/riscv.ad                | 11 ------
- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 30 +---------------
- src/hotspot/cpu/riscv/templateTable_riscv.cpp | 17 +++------
- 9 files changed, 11 insertions(+), 124 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-index 49653d04d81..1e482d7cc2b 100644
---- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-@@ -90,18 +90,6 @@ static void select_different_registers(Register preserve,
- 
- bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; }
- 
--void LIR_Assembler::clinit_barrier(ciMethod* method) {
--  assert(VM_Version::supports_fast_class_init_checks(), "sanity");
--  assert(!method->holder()->is_not_initialized(), "initialization should have been started");
--
--  Label L_skip_barrier;
--
--  __ mov_metadata(t1, method->holder()->constant_encoding());
--  __ clinit_barrier(t1, t0, &L_skip_barrier /* L_fast_path */);
--  __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
--  __ bind(L_skip_barrier);
--}
--
- LIR_Opr LIR_Assembler::receiverOpr() {
-   return FrameMap::receiver_opr;
- }
-diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-index a6d1b1470f9..99d981f97f4 100644
---- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-@@ -317,6 +317,12 @@ void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache, L
- }
- 
- void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) {
-+  // If we have to make this method not-entrant we'll overwrite its
-+  // first instruction with a jump. For this action to be legal we
-+  // must ensure that this first instruction is a J, JAL or NOP.
-+  // Make it a NOP.
-+  nop();
++package sun.jvm.hotspot.debugger.proc.riscv64;
 +
-   assert(bang_size_in_bytes >= framesize, "stack bang size incorrect");
-   // Make sure there is enough stack space for this method's activation.
-   // Note that we do this before creating a frame.
-@@ -330,12 +336,6 @@ void C1_MacroAssembler::remove_frame(int framesize) {
- 
- 
- void C1_MacroAssembler::verified_entry() {
--  // If we have to make this method not-entrant we'll overwrite its
--  // first instruction with a jump. For this action to be legal we
--  // must ensure that this first instruction is a J, JAL or NOP.
--  // Make it a NOP.
--
--  nop();
- }
- 
- void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) {
-diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-index 1133e80a210..b50be7e726c 100644
---- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-@@ -295,18 +295,6 @@ void InterpreterMacroAssembler::load_resolved_klass_at_offset(
-   ld(klass, Address(klass, Array<Klass*>::base_offset_in_bytes()));
- }
- 
--void InterpreterMacroAssembler::load_resolved_method_at_index(int byte_no,
--                                                              Register method,
--                                                              Register cache) {
--  const int method_offset = in_bytes(
--    ConstantPoolCache::base_offset() +
--      ((byte_no == TemplateTable::f2_byte)
--       ? ConstantPoolCacheEntry::f2_offset()
--       : ConstantPoolCacheEntry::f1_offset()));
--
--  ld(method, Address(cache, method_offset)); // get f1 Method*
--}
--
- // Generate a subtype check: branch to ok_is_subtype if sub_klass is a
- // subtype of super_klass.
- //
-diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
-index 4d8cb086f82..4126e8ee70f 100644
---- a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
-@@ -122,8 +122,6 @@ class InterpreterMacroAssembler: public MacroAssembler {
-   // Load cpool->resolved_klass_at(index).
-   void load_resolved_klass_at_offset(Register cpool, Register index, Register klass, Register temp);
- 
--  void load_resolved_method_at_index(int byte_no, Register method, Register cache);
--
-   void pop_ptr(Register r = x10);
-   void pop_i(Register r = x10);
-   void pop_l(Register r = x10);
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index a75bd9dfa89..304b6f2b06c 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -372,36 +372,6 @@ void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thr
-   sd(zr, Address(java_thread, JavaThread::vm_result_2_offset()));
- }
- 
--void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_path, Label* L_slow_path) {
--  assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required");
--  assert_different_registers(klass, xthread, tmp);
--
--  Label L_fallthrough, L_tmp;
--  if (L_fast_path == NULL) {
--    L_fast_path = &L_fallthrough;
--  } else if (L_slow_path == NULL) {
--    L_slow_path = &L_fallthrough;
--  }
--
--  // Fast path check: class is fully initialized
--  lbu(tmp, Address(klass, InstanceKlass::init_state_offset()));
--  sub(tmp, tmp, InstanceKlass::fully_initialized);
--  beqz(tmp, *L_fast_path);
--
--  // Fast path check: current thread is initializer thread
--  ld(tmp, Address(klass, InstanceKlass::init_thread_offset()));
--
--  if (L_slow_path == &L_fallthrough) {
--    beq(xthread, tmp, *L_fast_path);
--    bind(*L_slow_path);
--  } else if (L_fast_path == &L_fallthrough) {
--    bne(xthread, tmp, *L_slow_path);
--    bind(*L_fast_path);
--  } else {
--    Unimplemented();
--  }
--}
--
- void MacroAssembler::verify_oop(Register reg, const char* s) {
-   if (!VerifyOops) { return; }
- 
-@@ -3175,12 +3145,6 @@ void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) {
-   beq(src1, t0, equal);
- }
- 
--void MacroAssembler::load_method_holder(Register holder, Register method) {
--  ld(holder, Address(method, Method::const_offset()));                      // ConstMethod*
--  ld(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
--  ld(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
--}
--
- // string indexof
- // compute index by trailing zeros
- void MacroAssembler::compute_index(Register haystack, Register trailing_zeros,
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index b16fe904888..c6b71bdbc3c 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -671,9 +671,6 @@ class MacroAssembler: public Assembler {
- 
-   void cmpptr(Register src1, Address src2, Label& equal);
- 
--  void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = NULL, Label* L_slow_path = NULL);
--  void load_method_holder(Register holder, Register method);
--
-   void compute_index(Register str1, Register trailing_zeros, Register match_mask,
-                      Register result, Register char_tmp, Register tmp,
-                      bool haystack_isL);
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 0a1838695e1..13546ab328b 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1085,17 +1085,6 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
- 
-   assert_cond(C != NULL);
- 
--  if (C->clinit_barrier_on_entry()) {
--    assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
--
--    Label L_skip_barrier;
--
--    __ mov_metadata(t1, C->method()->holder()->constant_encoding());
--    __ clinit_barrier(t1, t0, &L_skip_barrier);
--    __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
--    __ bind(L_skip_barrier);
--  }
--
-   int bangsize = C->output()->bang_size_in_bytes();
-   if (C->output()->need_stack_bang(bangsize)) {
-     __ generate_stack_overflow_check(bangsize);
-diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-index 326ba62fcb0..ae414224c5b 100644
---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-@@ -623,29 +623,10 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
- 
-   address c2i_entry = __ pc();
- 
--  // Class initialization barrier for static methods
--  address c2i_no_clinit_check_entry = NULL;
--  if (VM_Version::supports_fast_class_init_checks()) {
--    Label L_skip_barrier;
--
--    { // Bypass the barrier for non-static methods
--      __ lwu(t0, Address(xmethod, Method::access_flags_offset()));
--      __ andi(t1, t0, JVM_ACC_STATIC);
--      __ beqz(t1, L_skip_barrier); // non-static
--    }
--
--    __ load_method_holder(t1, xmethod);
--    __ clinit_barrier(t1, t0, &L_skip_barrier);
--    __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
--
--    __ bind(L_skip_barrier);
--    c2i_no_clinit_check_entry = __ pc();
--  }
--
-   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
- 
-   __ flush();
--  return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry);
-+  return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
- }
- 
- int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
-@@ -1270,15 +1251,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
-   // first instruction with a jump.
-   __ nop();
- 
--  if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) {
--    Label L_skip_barrier;
--    __ mov_metadata(t1, method->method_holder()); // InstanceKlass*
--    __ clinit_barrier(t1, t0, &L_skip_barrier);
--    __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
--
--    __ bind(L_skip_barrier);
--  }
--
-   // Generate stack overflow check
-   __ bang_stack_with_offset(checked_cast<int>(StackOverflow::stack_shadow_zone_size()));
- 
-diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-index bb20f228447..1f4409a9c9a 100644
---- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-@@ -2307,7 +2307,7 @@ void TemplateTable::resolve_cache_and_index(int byte_no,
-   const Register temp = x9;
-   assert_different_registers(Rcache, index, temp);
- 
--  Label resolved, clinit_barrier_slow;
-+  Label resolved;
- 
-   Bytecodes::Code code = bytecode();
-   switch (code) {
-@@ -2321,10 +2321,6 @@ void TemplateTable::resolve_cache_and_index(int byte_no,
-   __ mv(t0, (int) code);
-   __ beq(temp, t0, resolved);
- 
--  // resolve first time through
--  // Class initialization barrier slow path lands here as well.
--  __ bind(clinit_barrier_slow);
--
-   address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
-   __ mv(temp, (int) code);
-   __ call_VM(noreg, entry, temp);
-@@ -2334,13 +2330,6 @@ void TemplateTable::resolve_cache_and_index(int byte_no,
-   // n.b. unlike x86 Rcache is now rcpool plus the indexed offset
-   // so all clients ofthis method must be modified accordingly
-   __ bind(resolved);
--
--  // Class initialization barrier for static methods
--  if (VM_Version::supports_fast_class_init_checks() && bytecode() == Bytecodes::_invokestatic) {
--    __ load_resolved_method_at_index(byte_no, temp, Rcache);
--    __ load_method_holder(temp, temp);
--    __ clinit_barrier(temp, t0, NULL, &clinit_barrier_slow);
--  }
- }
- 
- // The Rcache and index registers must be set before call
-@@ -3431,7 +3420,9 @@ void TemplateTable::invokeinterface(int byte_no) {
-   __ profile_virtual_call(x13, x30, x9);
- 
-   // Get declaring interface class from method, and itable index
--  __ load_method_holder(x10, xmethod);
-+  __ ld(x10, Address(xmethod, Method::const_offset()));
-+  __ ld(x10, Address(x10, ConstMethod::constants_offset()));
-+  __ ld(x10, Address(x10, ConstantPool::pool_holder_offset_in_bytes()));
-   __ lwu(xmethod, Address(xmethod, Method::itable_index_offset()));
-   __ subw(xmethod, xmethod, Method::itable_index_max);
-   __ negw(xmethod, xmethod);
-
-From c259a42eac0a11e080d28dabe7f745ee79a53663 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sat, 29 Apr 2023 18:36:13 +0800
-Subject: [PATCH 089/140] Revert JDK-8268119: Rename copy_os_cpu.inline.hpp
- files to copy_os_cpu.hpp && JDK-8142362: Lots of code duplication in Copy
- class
-
----
- src/hotspot/cpu/riscv/copy_riscv.hpp          |  85 +-----------
- .../os_cpu/linux_riscv/copy_linux_riscv.hpp   |  31 -----
- .../linux_riscv/copy_linux_riscv.inline.hpp   | 124 ++++++++++++++++++
- 3 files changed, 128 insertions(+), 112 deletions(-)
- delete mode 100644 src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp
-
-diff --git a/src/hotspot/cpu/riscv/copy_riscv.hpp b/src/hotspot/cpu/riscv/copy_riscv.hpp
-index bceadcc5dcc..05da242e354 100644
---- a/src/hotspot/cpu/riscv/copy_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/copy_riscv.hpp
-@@ -27,7 +27,10 @@
- #ifndef CPU_RISCV_COPY_RISCV_HPP
- #define CPU_RISCV_COPY_RISCV_HPP
- 
--#include OS_CPU_HEADER(copy)
-+// Inline functions for memory copy and fill.
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.proc.*;
 +
-+// Contains inline asm implementations
-+#include OS_CPU_HEADER_INLINE(copy)
- 
- static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
-   julong* to = (julong*) tohw;
-@@ -53,84 +56,4 @@ static void pd_zero_to_bytes(void* to, size_t count) {
-   (void)memset(to, 0, count);
- }
- 
--static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
--  (void)memmove(to, from, count * HeapWordSize);
--}
--
--static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
--  switch (count) {
--    case 8:  to[7] = from[7];   // fall through
--    case 7:  to[6] = from[6];   // fall through
--    case 6:  to[5] = from[5];   // fall through
--    case 5:  to[4] = from[4];   // fall through
--    case 4:  to[3] = from[3];   // fall through
--    case 3:  to[2] = from[2];   // fall through
--    case 2:  to[1] = from[1];   // fall through
--    case 1:  to[0] = from[0];   // fall through
--    case 0:  break;
--    default:
--      memcpy(to, from, count * HeapWordSize);
--      break;
--  }
--}
--
--static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
--  shared_disjoint_words_atomic(from, to, count);
--}
--
--static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
--  pd_conjoint_words(from, to, count);
--}
--
--static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
--  pd_disjoint_words(from, to, count);
--}
--
--static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
--  (void)memmove(to, from, count);
--}
--
--static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
--  pd_conjoint_bytes(from, to, count);
--}
--
--static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
--  _Copy_conjoint_jshorts_atomic(from, to, count);
--}
--
--static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
--  _Copy_conjoint_jints_atomic(from, to, count);
--}
--
--static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
--  _Copy_conjoint_jlongs_atomic(from, to, count);
--}
--
--static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
--  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size.");
--  _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count);
--}
--
--static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
--  _Copy_arrayof_conjoint_bytes(from, to, count);
--}
--
--static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) {
--  _Copy_arrayof_conjoint_jshorts(from, to, count);
--}
--
--static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) {
--  _Copy_arrayof_conjoint_jints(from, to, count);
--}
--
--static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) {
--  _Copy_arrayof_conjoint_jlongs(from, to, count);
--}
--
--static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) {
--  assert(!UseCompressedOops, "foo!");
--  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
--  _Copy_arrayof_conjoint_jlongs(from, to, count);
--}
--
- #endif // CPU_RISCV_COPY_RISCV_HPP
-diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp
-deleted file mode 100644
-index 147cfdf3c10..00000000000
---- a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp
-+++ /dev/null
-@@ -1,31 +0,0 @@
--/*
-- * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP
--#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP
--
--// Empty for build system
--
--#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP
-diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp
++public class ProcRISCV64ThreadFactory implements ProcThreadFactory {
++    private ProcDebugger debugger;
++
++    public ProcRISCV64ThreadFactory(ProcDebugger debugger) {
++        this.debugger = debugger;
++    }
++
++    public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
++        return new ProcRISCV64Thread(debugger, threadIdentifierAddr);
++    }
++
++    public ThreadProxy createThreadWrapper(long id) {
++        return new ProcRISCV64Thread(debugger, id);
++    }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java
 new file mode 100644
-index 00000000000..bdf36d6b4c3
+index 0000000000..aecbda5902
 --- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp
-@@ -0,0 +1,124 @@
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java
+@@ -0,0 +1,55 @@
 +/*
-+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -75502,5044 +55330,1923 @@ index 00000000000..bdf36d6b4c3
 + *
 + */
 +
-+#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP
-+#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP
++package sun.jvm.hotspot.debugger.remote.riscv64;
 +
-+static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
-+  (void)memmove(to, from, count * HeapWordSize);
-+}
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.riscv64.*;
++import sun.jvm.hotspot.debugger.remote.*;
++import sun.jvm.hotspot.utilities.*;
 +
-+static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
-+  switch (count) {
-+    case 8:  to[7] = from[7];   // fall through
-+    case 7:  to[6] = from[6];   // fall through
-+    case 6:  to[5] = from[5];   // fall through
-+    case 5:  to[4] = from[4];   // fall through
-+    case 4:  to[3] = from[3];   // fall through
-+    case 3:  to[2] = from[2];   // fall through
-+    case 2:  to[1] = from[1];   // fall through
-+    case 1:  to[0] = from[0];   // fall through
-+    case 0:  break;
-+    default:
-+      memcpy(to, from, count * HeapWordSize);
-+      break;
++public class RemoteRISCV64Thread extends RemoteThread  {
++  public RemoteRISCV64Thread(RemoteDebuggerClient debugger, Address addr) {
++     super(debugger, addr);
 +  }
-+}
 +
-+static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
-+  switch (count) {
-+    case 8:  to[7] = from[7];
-+    case 7:  to[6] = from[6];
-+    case 6:  to[5] = from[5];
-+    case 5:  to[4] = from[4];
-+    case 4:  to[3] = from[3];
-+    case 3:  to[2] = from[2];
-+    case 2:  to[1] = from[1];
-+    case 1:  to[0] = from[0];
-+    case 0:  break;
-+    default:
-+      while (count-- > 0) {
-+        *to++ = *from++;
-+      }
-+      break;
++  public RemoteRISCV64Thread(RemoteDebuggerClient debugger, long id) {
++     super(debugger, id);
 +  }
-+}
 +
-+static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
-+  pd_conjoint_words(from, to, count);
++  public ThreadContext getContext() throws IllegalThreadStateException {
++    RemoteRISCV64ThreadContext context = new RemoteRISCV64ThreadContext(debugger);
++    long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) :
++                                  debugger.getThreadIntegerRegisterSet(id);
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(regs.length == RISCV64ThreadContext.NPRGREG, "size of register set must match");
++    }
++    for (int i = 0; i < regs.length; i++) {
++      context.setRegister(i, regs[i]);
++    }
++    return context;
++  }
 +}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java
+new file mode 100644
+index 0000000000..1d3da6be5a
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java
+@@ -0,0 +1,48 @@
++/*
++ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
-+  pd_disjoint_words(from, to, count);
-+}
++package sun.jvm.hotspot.debugger.remote.riscv64;
 +
-+static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
-+  (void)memmove(to, from, count);
-+}
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.riscv64.*;
++import sun.jvm.hotspot.debugger.remote.*;
 +
-+static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
-+  pd_conjoint_bytes(from, to, count);
-+}
++public class RemoteRISCV64ThreadContext extends RISCV64ThreadContext {
++  private RemoteDebuggerClient debugger;
 +
-+static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
-+  _Copy_conjoint_jshorts_atomic(from, to, count);
-+}
++  public RemoteRISCV64ThreadContext(RemoteDebuggerClient debugger) {
++    super();
++    this.debugger = debugger;
++  }
 +
-+static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
-+  _Copy_conjoint_jints_atomic(from, to, count);
-+}
++  public void setRegisterAsAddress(int index, Address value) {
++    setRegister(index, debugger.getAddressValue(value));
++  }
 +
-+static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
-+  _Copy_conjoint_jlongs_atomic(from, to, count);
++  public Address getRegisterAsAddress(int index) {
++    return debugger.newAddress(getRegister(index));
++  }
 +}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java
+new file mode 100644
+index 0000000000..725b94e25a
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java
+@@ -0,0 +1,46 @@
++/*
++ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
-+  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size.");
-+  _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count);
-+}
++package sun.jvm.hotspot.debugger.remote.riscv64;
 +
-+static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
-+  _Copy_arrayof_conjoint_bytes(from, to, count);
-+}
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.remote.*;
 +
-+static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) {
-+  _Copy_arrayof_conjoint_jshorts(from, to, count);
-+}
++public class RemoteRISCV64ThreadFactory implements RemoteThreadFactory {
++  private RemoteDebuggerClient debugger;
 +
-+static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) {
-+  _Copy_arrayof_conjoint_jints(from, to, count);
-+}
++  public RemoteRISCV64ThreadFactory(RemoteDebuggerClient debugger) {
++    this.debugger = debugger;
++  }
 +
-+static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) {
-+  _Copy_arrayof_conjoint_jlongs(from, to, count);
-+}
++  public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
++    return new RemoteRISCV64Thread(debugger, threadIdentifierAddr);
++  }
 +
-+static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) {
-+  assert(!UseCompressedOops, "foo!");
-+  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
-+  _Copy_arrayof_conjoint_jlongs(from, to, count);
++  public ThreadProxy createThreadWrapper(long id) {
++    return new RemoteRISCV64Thread(debugger, id);
++  }
 +}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java
+new file mode 100644
+index 0000000000..fb60a70427
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java
+@@ -0,0 +1,172 @@
++/*
++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP
-
-From 6033e30ebd94f2315bf809a42ef00c85bdbc780e Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sat, 29 Apr 2023 19:33:21 +0800
-Subject: [PATCH 090/140] Revert JDK-8241436: C2: Factor out C2-specific code
- from MacroAssembler
-
----
- .../cpu/riscv/c2_MacroAssembler_riscv.cpp     | 1321 -----------------
- .../cpu/riscv/c2_MacroAssembler_riscv.hpp     |  141 --
- .../cpu/riscv/macroAssembler_riscv.cpp        | 1282 ++++++++++++++++
- .../cpu/riscv/macroAssembler_riscv.hpp        |  103 ++
- src/hotspot/cpu/riscv/riscv.ad                |  124 +-
- 5 files changed, 1447 insertions(+), 1524 deletions(-)
- delete mode 100644 src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
- delete mode 100644 src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
-
-diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
-deleted file mode 100644
-index 73f84a724ca..00000000000
---- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
-+++ /dev/null
-@@ -1,1321 +0,0 @@
--/*
-- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#include "precompiled.hpp"
--#include "asm/assembler.hpp"
--#include "asm/assembler.inline.hpp"
--#include "opto/c2_MacroAssembler.hpp"
--#include "opto/intrinsicnode.hpp"
--#include "opto/subnode.hpp"
--#include "runtime/stubRoutines.hpp"
--
--#ifdef PRODUCT
--#define BLOCK_COMMENT(str) /* nothing */
--#define STOP(error) stop(error)
--#else
--#define BLOCK_COMMENT(str) block_comment(str)
--#define STOP(error) block_comment(error); stop(error)
--#endif
--
--#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
--
--// short string
--// StringUTF16.indexOfChar
--// StringLatin1.indexOfChar
--void C2_MacroAssembler::string_indexof_char_short(Register str1, Register cnt1,
--                                                  Register ch, Register result,
--                                                  bool isL)
--{
--  Register ch1 = t0;
--  Register index = t1;
--
--  BLOCK_COMMENT("string_indexof_char_short {");
--
--  Label LOOP, LOOP1, LOOP4, LOOP8;
--  Label MATCH,  MATCH1, MATCH2, MATCH3,
--        MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH;
--
--  mv(result, -1);
--  mv(index, zr);
--
--  bind(LOOP);
--  addi(t0, index, 8);
--  ble(t0, cnt1, LOOP8);
--  addi(t0, index, 4);
--  ble(t0, cnt1, LOOP4);
--  j(LOOP1);
--
--  bind(LOOP8);
--  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
--  beq(ch, ch1, MATCH);
--  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
--  beq(ch, ch1, MATCH1);
--  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
--  beq(ch, ch1, MATCH2);
--  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
--  beq(ch, ch1, MATCH3);
--  isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8));
--  beq(ch, ch1, MATCH4);
--  isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10));
--  beq(ch, ch1, MATCH5);
--  isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12));
--  beq(ch, ch1, MATCH6);
--  isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14));
--  beq(ch, ch1, MATCH7);
--  addi(index, index, 8);
--  addi(str1, str1, isL ? 8 : 16);
--  blt(index, cnt1, LOOP);
--  j(NOMATCH);
--
--  bind(LOOP4);
--  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
--  beq(ch, ch1, MATCH);
--  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
--  beq(ch, ch1, MATCH1);
--  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
--  beq(ch, ch1, MATCH2);
--  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
--  beq(ch, ch1, MATCH3);
--  addi(index, index, 4);
--  addi(str1, str1, isL ? 4 : 8);
--  bge(index, cnt1, NOMATCH);
--
--  bind(LOOP1);
--  isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1));
--  beq(ch, ch1, MATCH);
--  addi(index, index, 1);
--  addi(str1, str1, isL ? 1 : 2);
--  blt(index, cnt1, LOOP1);
--  j(NOMATCH);
--
--  bind(MATCH1);
--  addi(index, index, 1);
--  j(MATCH);
--
--  bind(MATCH2);
--  addi(index, index, 2);
--  j(MATCH);
--
--  bind(MATCH3);
--  addi(index, index, 3);
--  j(MATCH);
--
--  bind(MATCH4);
--  addi(index, index, 4);
--  j(MATCH);
--
--  bind(MATCH5);
--  addi(index, index, 5);
--  j(MATCH);
--
--  bind(MATCH6);
--  addi(index, index, 6);
--  j(MATCH);
--
--  bind(MATCH7);
--  addi(index, index, 7);
--
--  bind(MATCH);
--  mv(result, index);
--  bind(NOMATCH);
--  BLOCK_COMMENT("} string_indexof_char_short");
--}
--
--// StringUTF16.indexOfChar
--// StringLatin1.indexOfChar
--void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1,
--                                            Register ch, Register result,
--                                            Register tmp1, Register tmp2,
--                                            Register tmp3, Register tmp4,
--                                            bool isL)
--{
--  Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG;
--  Register ch1 = t0;
--  Register orig_cnt = t1;
--  Register mask1 = tmp3;
--  Register mask2 = tmp2;
--  Register match_mask = tmp1;
--  Register trailing_char = tmp4;
--  Register unaligned_elems = tmp4;
--
--  BLOCK_COMMENT("string_indexof_char {");
--  beqz(cnt1, NOMATCH);
--
--  addi(t0, cnt1, isL ? -32 : -16);
--  bgtz(t0, DO_LONG);
--  string_indexof_char_short(str1, cnt1, ch, result, isL);
--  j(DONE);
--
--  bind(DO_LONG);
--  mv(orig_cnt, cnt1);
--  if (AvoidUnalignedAccesses) {
--    Label ALIGNED;
--    andi(unaligned_elems, str1, 0x7);
--    beqz(unaligned_elems, ALIGNED);
--    sub(unaligned_elems, unaligned_elems, 8);
--    neg(unaligned_elems, unaligned_elems);
--    if (!isL) {
--      srli(unaligned_elems, unaligned_elems, 1);
--    }
--    // do unaligned part per element
--    string_indexof_char_short(str1, unaligned_elems, ch, result, isL);
--    bgez(result, DONE);
--    mv(orig_cnt, cnt1);
--    sub(cnt1, cnt1, unaligned_elems);
--    bind(ALIGNED);
--  }
--
--  // duplicate ch
--  if (isL) {
--    slli(ch1, ch, 8);
--    orr(ch, ch1, ch);
--  }
--  slli(ch1, ch, 16);
--  orr(ch, ch1, ch);
--  slli(ch1, ch, 32);
--  orr(ch, ch1, ch);
--
--  if (!isL) {
--    slli(cnt1, cnt1, 1);
--  }
--
--  uint64_t mask0101 = UCONST64(0x0101010101010101);
--  uint64_t mask0001 = UCONST64(0x0001000100010001);
--  mv(mask1, isL ? mask0101 : mask0001);
--  uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f);
--  uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff);
--  mv(mask2, isL ? mask7f7f : mask7fff);
--
--  bind(CH1_LOOP);
--  ld(ch1, Address(str1));
--  addi(str1, str1, 8);
--  addi(cnt1, cnt1, -8);
--  compute_match_mask(ch1, ch, match_mask, mask1, mask2);
--  bnez(match_mask, HIT);
--  bgtz(cnt1, CH1_LOOP);
--  j(NOMATCH);
--
--  bind(HIT);
--  ctzc_bit(trailing_char, match_mask, isL, ch1, result);
--  srli(trailing_char, trailing_char, 3);
--  addi(cnt1, cnt1, 8);
--  ble(cnt1, trailing_char, NOMATCH);
--  // match case
--  if (!isL) {
--    srli(cnt1, cnt1, 1);
--    srli(trailing_char, trailing_char, 1);
--  }
--
--  sub(result, orig_cnt, cnt1);
--  add(result, result, trailing_char);
--  j(DONE);
--
--  bind(NOMATCH);
--  mv(result, -1);
--
--  bind(DONE);
--  BLOCK_COMMENT("} string_indexof_char");
--}
--
--typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp);
--
--// Search for needle in haystack and return index or -1
--// x10: result
--// x11: haystack
--// x12: haystack_len
--// x13: needle
--// x14: needle_len
--void C2_MacroAssembler::string_indexof(Register haystack, Register needle,
--                                       Register haystack_len, Register needle_len,
--                                       Register tmp1, Register tmp2,
--                                       Register tmp3, Register tmp4,
--                                       Register tmp5, Register tmp6,
--                                       Register result, int ae)
--{
--  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
--
--  Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH;
--
--  Register ch1 = t0;
--  Register ch2 = t1;
--  Register nlen_tmp = tmp1; // needle len tmp
--  Register hlen_tmp = tmp2; // haystack len tmp
--  Register result_tmp = tmp4;
--
--  bool isLL = ae == StrIntrinsicNode::LL;
--
--  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
--  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
--  int needle_chr_shift = needle_isL ? 0 : 1;
--  int haystack_chr_shift = haystack_isL ? 0 : 1;
--  int needle_chr_size = needle_isL ? 1 : 2;
--  int haystack_chr_size = haystack_isL ? 1 : 2;
--  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
--                              (load_chr_insn)&MacroAssembler::lhu;
--  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
--                                (load_chr_insn)&MacroAssembler::lhu;
--
--  BLOCK_COMMENT("string_indexof {");
--
--  // Note, inline_string_indexOf() generates checks:
--  // if (pattern.count > src.count) return -1;
--  // if (pattern.count == 0) return 0;
--
--  // We have two strings, a source string in haystack, haystack_len and a pattern string
--  // in needle, needle_len. Find the first occurence of pattern in source or return -1.
--
--  // For larger pattern and source we use a simplified Boyer Moore algorithm.
--  // With a small pattern and source we use linear scan.
--
--  // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm.
--  sub(result_tmp, haystack_len, needle_len);
--  // needle_len < 8, use linear scan
--  sub(t0, needle_len, 8);
--  bltz(t0, LINEARSEARCH);
--  // needle_len >= 256, use linear scan
--  sub(t0, needle_len, 256);
--  bgez(t0, LINEARSTUB);
--  // needle_len >= haystack_len/4, use linear scan
--  srli(t0, haystack_len, 2);
--  bge(needle_len, t0, LINEARSTUB);
--
--  // Boyer-Moore-Horspool introduction:
--  // The Boyer Moore alogorithm is based on the description here:-
--  //
--  // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
--  //
--  // This describes and algorithm with 2 shift rules. The 'Bad Character' rule
--  // and the 'Good Suffix' rule.
--  //
--  // These rules are essentially heuristics for how far we can shift the
--  // pattern along the search string.
--  //
--  // The implementation here uses the 'Bad Character' rule only because of the
--  // complexity of initialisation for the 'Good Suffix' rule.
--  //
--  // This is also known as the Boyer-Moore-Horspool algorithm:
--  //
--  // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
--  //
--  // #define ASIZE 256
--  //
--  //    int bm(unsigned char *pattern, int m, unsigned char *src, int n) {
--  //      int i, j;
--  //      unsigned c;
--  //      unsigned char bc[ASIZE];
--  //
--  //      /* Preprocessing */
--  //      for (i = 0; i < ASIZE; ++i)
--  //        bc[i] = m;
--  //      for (i = 0; i < m - 1; ) {
--  //        c = pattern[i];
--  //        ++i;
--  //        // c < 256 for Latin1 string, so, no need for branch
--  //        #ifdef PATTERN_STRING_IS_LATIN1
--  //        bc[c] = m - i;
--  //        #else
--  //        if (c < ASIZE) bc[c] = m - i;
--  //        #endif
--  //      }
--  //
--  //      /* Searching */
--  //      j = 0;
--  //      while (j <= n - m) {
--  //        c = src[i+j];
--  //        if (pattern[m-1] == c)
--  //          int k;
--  //          for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
--  //          if (k < 0) return j;
--  //          // c < 256 for Latin1 string, so, no need for branch
--  //          #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1
--  //          // LL case: (c< 256) always true. Remove branch
--  //          j += bc[pattern[j+m-1]];
--  //          #endif
--  //          #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF
--  //          // UU case: need if (c<ASIZE) check. Skip 1 character if not.
--  //          if (c < ASIZE)
--  //            j += bc[pattern[j+m-1]];
--  //          else
--  //            j += 1
--  //          #endif
--  //          #ifdef SOURCE_IS_UTF_AND_PATTERN_IS_LATIN1
--  //          // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.
--  //          if (c < ASIZE)
--  //            j += bc[pattern[j+m-1]];
--  //          else
--  //            j += m
--  //          #endif
--  //      }
--  //      return -1;
--  //    }
--
--  // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result
--  Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,
--        BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;
--
--  Register haystack_end = haystack_len;
--  Register skipch = tmp2;
--
--  // pattern length is >=8, so, we can read at least 1 register for cases when
--  // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for
--  // UL case. We'll re-read last character in inner pre-loop code to have
--  // single outer pre-loop load
--  const int firstStep = isLL ? 7 : 3;
--
--  const int ASIZE = 256;
--  const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd)
--
--  sub(sp, sp, ASIZE);
--
--  // init BC offset table with default value: needle_len
--  slli(t0, needle_len, 8);
--  orr(t0, t0, needle_len); // [63...16][needle_len][needle_len]
--  slli(tmp1, t0, 16);
--  orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len]
--  slli(tmp1, t0, 32);
--  orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len]
--
--  mv(ch1, sp);  // ch1 is t0
--  mv(tmp6, ASIZE / STORE_BYTES); // loop iterations
--
--  bind(BM_INIT_LOOP);
--  // for (i = 0; i < ASIZE; ++i)
--  //   bc[i] = m;
--  for (int i = 0; i < 4; i++) {
--    sd(tmp5, Address(ch1, i * wordSize));
--  }
--  add(ch1, ch1, 32);
--  sub(tmp6, tmp6, 4);
--  bgtz(tmp6, BM_INIT_LOOP);
--
--  sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern
--  Register orig_haystack = tmp5;
--  mv(orig_haystack, haystack);
--  // result_tmp = tmp4
--  shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift);
--  sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1
--  mv(tmp3, needle);
--
--  //  for (i = 0; i < m - 1; ) {
--  //    c = pattern[i];
--  //    ++i;
--  //    // c < 256 for Latin1 string, so, no need for branch
--  //    #ifdef PATTERN_STRING_IS_LATIN1
--  //    bc[c] = m - i;
--  //    #else
--  //    if (c < ASIZE) bc[c] = m - i;
--  //    #endif
--  //  }
--  bind(BCLOOP);
--  (this->*needle_load_1chr)(ch1, Address(tmp3), noreg);
--  add(tmp3, tmp3, needle_chr_size);
--  if (!needle_isL) {
--    // ae == StrIntrinsicNode::UU
--    mv(tmp6, ASIZE);
--    bgeu(ch1, tmp6, BCSKIP);
--  }
--  add(tmp4, sp, ch1);
--  sb(ch2, Address(tmp4)); // store skip offset to BC offset table
--
--  bind(BCSKIP);
--  sub(ch2, ch2, 1); // for next pattern element, skip distance -1
--  bgtz(ch2, BCLOOP);
--
--  // tmp6: pattern end, address after needle
--  shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift);
--  if (needle_isL == haystack_isL) {
--    // load last 8 bytes (8LL/4UU symbols)
--    ld(tmp6, Address(tmp6, -wordSize));
--  } else {
--    // UL: from UTF-16(source) search Latin1(pattern)
--    lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols)
--    // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d
--    // We'll have to wait until load completed, but it's still faster than per-character loads+checks
--    srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a
--    slli(ch2, tmp6, XLEN - 24);
--    srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b
--    slli(ch1, tmp6, XLEN - 16);
--    srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c
--    andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d
--    slli(ch2, ch2, 16);
--    orr(ch2, ch2, ch1); // 0x00000b0c
--    slli(result, tmp3, 48); // use result as temp register
--    orr(tmp6, tmp6, result); // 0x0a00000d
--    slli(result, ch2, 16);
--    orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d
--  }
--
--  // i = m - 1;
--  // skipch = j + i;
--  // if (skipch == pattern[m - 1]
--  //   for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
--  // else
--  //   move j with bad char offset table
--  bind(BMLOOPSTR2);
--  // compare pattern to source string backward
--  shadd(result, nlen_tmp, haystack, result, haystack_chr_shift);
--  (this->*haystack_load_1chr)(skipch, Address(result), noreg);
--  sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8
--  if (needle_isL == haystack_isL) {
--    // re-init tmp3. It's for free because it's executed in parallel with
--    // load above. Alternative is to initialize it before loop, but it'll
--    // affect performance on in-order systems with 2 or more ld/st pipelines
--    srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1]
--  }
--  if (!isLL) { // UU/UL case
--    slli(ch2, nlen_tmp, 1); // offsets in bytes
--  }
--  bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char
--  add(result, haystack, isLL ? nlen_tmp : ch2);
--  ld(ch2, Address(result)); // load 8 bytes from source string
--  mv(ch1, tmp6);
--  if (isLL) {
--    j(BMLOOPSTR1_AFTER_LOAD);
--  } else {
--    sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8
--    j(BMLOOPSTR1_CMP);
--  }
--
--  bind(BMLOOPSTR1);
--  shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift);
--  (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
--  shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift);
--  (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
--
--  bind(BMLOOPSTR1_AFTER_LOAD);
--  sub(nlen_tmp, nlen_tmp, 1);
--  bltz(nlen_tmp, BMLOOPSTR1_LASTCMP);
--
--  bind(BMLOOPSTR1_CMP);
--  beq(ch1, ch2, BMLOOPSTR1);
--
--  bind(BMSKIP);
--  if (!isLL) {
--    // if we've met UTF symbol while searching Latin1 pattern, then we can
--    // skip needle_len symbols
--    if (needle_isL != haystack_isL) {
--      mv(result_tmp, needle_len);
--    } else {
--      mv(result_tmp, 1);
--    }
--    mv(t0, ASIZE);
--    bgeu(skipch, t0, BMADV);
--  }
--  add(result_tmp, sp, skipch);
--  lbu(result_tmp, Address(result_tmp)); // load skip offset
--
--  bind(BMADV);
--  sub(nlen_tmp, needle_len, 1);
--  // move haystack after bad char skip offset
--  shadd(haystack, result_tmp, haystack, result, haystack_chr_shift);
--  ble(haystack, haystack_end, BMLOOPSTR2);
--  add(sp, sp, ASIZE);
--  j(NOMATCH);
--
--  bind(BMLOOPSTR1_LASTCMP);
--  bne(ch1, ch2, BMSKIP);
--
--  bind(BMMATCH);
--  sub(result, haystack, orig_haystack);
--  if (!haystack_isL) {
--    srli(result, result, 1);
--  }
--  add(sp, sp, ASIZE);
--  j(DONE);
--
--  bind(LINEARSTUB);
--  sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm
--  bltz(t0, LINEARSEARCH);
--  mv(result, zr);
--  RuntimeAddress stub = NULL;
--  if (isLL) {
--    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll());
--    assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
--  } else if (needle_isL) {
--    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul());
--    assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
--  } else {
--    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu());
--    assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
--  }
--  trampoline_call(stub);
--  j(DONE);
--
--  bind(NOMATCH);
--  mv(result, -1);
--  j(DONE);
--
--  bind(LINEARSEARCH);
--  string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae);
--
--  bind(DONE);
--  BLOCK_COMMENT("} string_indexof");
--}
--
--// string_indexof
--// result: x10
--// src: x11
--// src_count: x12
--// pattern: x13
--// pattern_count: x14 or 1/2/3/4
--void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register needle,
--                                               Register haystack_len, Register needle_len,
--                                               Register tmp1, Register tmp2,
--                                               Register tmp3, Register tmp4,
--                                               int needle_con_cnt, Register result, int ae)
--{
--  // Note:
--  // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant
--  // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1
--  assert(needle_con_cnt <= 4, "Invalid needle constant count");
--  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
--
--  Register ch1 = t0;
--  Register ch2 = t1;
--  Register hlen_neg = haystack_len, nlen_neg = needle_len;
--  Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4;
--
--  bool isLL = ae == StrIntrinsicNode::LL;
--
--  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
--  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
--  int needle_chr_shift = needle_isL ? 0 : 1;
--  int haystack_chr_shift = haystack_isL ? 0 : 1;
--  int needle_chr_size = needle_isL ? 1 : 2;
--  int haystack_chr_size = haystack_isL ? 1 : 2;
--
--  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
--                              (load_chr_insn)&MacroAssembler::lhu;
--  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
--                                (load_chr_insn)&MacroAssembler::lhu;
--  load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu;
--  load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld;
--
--  Label DO1, DO2, DO3, MATCH, NOMATCH, DONE;
--
--  Register first = tmp3;
--
--  if (needle_con_cnt == -1) {
--    Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
--
--    sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2);
--    bltz(t0, DOSHORT);
--
--    (this->*needle_load_1chr)(first, Address(needle), noreg);
--    slli(t0, needle_len, needle_chr_shift);
--    add(needle, needle, t0);
--    neg(nlen_neg, t0);
--    slli(t0, result_tmp, haystack_chr_shift);
--    add(haystack, haystack, t0);
--    neg(hlen_neg, t0);
--
--    bind(FIRST_LOOP);
--    add(t0, haystack, hlen_neg);
--    (this->*haystack_load_1chr)(ch2, Address(t0), noreg);
--    beq(first, ch2, STR1_LOOP);
--
--    bind(STR2_NEXT);
--    add(hlen_neg, hlen_neg, haystack_chr_size);
--    blez(hlen_neg, FIRST_LOOP);
--    j(NOMATCH);
--
--    bind(STR1_LOOP);
--    add(nlen_tmp, nlen_neg, needle_chr_size);
--    add(hlen_tmp, hlen_neg, haystack_chr_size);
--    bgez(nlen_tmp, MATCH);
--
--    bind(STR1_NEXT);
--    add(ch1, needle, nlen_tmp);
--    (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
--    add(ch2, haystack, hlen_tmp);
--    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
--    bne(ch1, ch2, STR2_NEXT);
--    add(nlen_tmp, nlen_tmp, needle_chr_size);
--    add(hlen_tmp, hlen_tmp, haystack_chr_size);
--    bltz(nlen_tmp, STR1_NEXT);
--    j(MATCH);
--
--    bind(DOSHORT);
--    if (needle_isL == haystack_isL) {
--      sub(t0, needle_len, 2);
--      bltz(t0, DO1);
--      bgtz(t0, DO3);
--    }
--  }
--
--  if (needle_con_cnt == 4) {
--    Label CH1_LOOP;
--    (this->*load_4chr)(ch1, Address(needle), noreg);
--    sub(result_tmp, haystack_len, 4);
--    slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp
--    add(haystack, haystack, tmp3);
--    neg(hlen_neg, tmp3);
--
--    bind(CH1_LOOP);
--    add(ch2, haystack, hlen_neg);
--    (this->*load_4chr)(ch2, Address(ch2), noreg);
--    beq(ch1, ch2, MATCH);
--    add(hlen_neg, hlen_neg, haystack_chr_size);
--    blez(hlen_neg, CH1_LOOP);
--    j(NOMATCH);
--  }
--
--  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) {
--    Label CH1_LOOP;
--    BLOCK_COMMENT("string_indexof DO2 {");
--    bind(DO2);
--    (this->*load_2chr)(ch1, Address(needle), noreg);
--    if (needle_con_cnt == 2) {
--      sub(result_tmp, haystack_len, 2);
--    }
--    slli(tmp3, result_tmp, haystack_chr_shift);
--    add(haystack, haystack, tmp3);
--    neg(hlen_neg, tmp3);
--
--    bind(CH1_LOOP);
--    add(tmp3, haystack, hlen_neg);
--    (this->*load_2chr)(ch2, Address(tmp3), noreg);
--    beq(ch1, ch2, MATCH);
--    add(hlen_neg, hlen_neg, haystack_chr_size);
--    blez(hlen_neg, CH1_LOOP);
--    j(NOMATCH);
--    BLOCK_COMMENT("} string_indexof DO2");
--  }
--
--  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) {
--    Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
--    BLOCK_COMMENT("string_indexof DO3 {");
--
--    bind(DO3);
--    (this->*load_2chr)(first, Address(needle), noreg);
--    (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg);
--    if (needle_con_cnt == 3) {
--      sub(result_tmp, haystack_len, 3);
--    }
--    slli(hlen_tmp, result_tmp, haystack_chr_shift);
--    add(haystack, haystack, hlen_tmp);
--    neg(hlen_neg, hlen_tmp);
--
--    bind(FIRST_LOOP);
--    add(ch2, haystack, hlen_neg);
--    (this->*load_2chr)(ch2, Address(ch2), noreg);
--    beq(first, ch2, STR1_LOOP);
--
--    bind(STR2_NEXT);
--    add(hlen_neg, hlen_neg, haystack_chr_size);
--    blez(hlen_neg, FIRST_LOOP);
--    j(NOMATCH);
--
--    bind(STR1_LOOP);
--    add(hlen_tmp, hlen_neg, 2 * haystack_chr_size);
--    add(ch2, haystack, hlen_tmp);
--    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
--    bne(ch1, ch2, STR2_NEXT);
--    j(MATCH);
--    BLOCK_COMMENT("} string_indexof DO3");
--  }
--
--  if (needle_con_cnt == -1 || needle_con_cnt == 1) {
--    Label DO1_LOOP;
--
--    BLOCK_COMMENT("string_indexof DO1 {");
--    bind(DO1);
--    (this->*needle_load_1chr)(ch1, Address(needle), noreg);
--    sub(result_tmp, haystack_len, 1);
--    mv(tmp3, result_tmp);
--    if (haystack_chr_shift) {
--      slli(tmp3, result_tmp, haystack_chr_shift);
--    }
--    add(haystack, haystack, tmp3);
--    neg(hlen_neg, tmp3);
--
--    bind(DO1_LOOP);
--    add(tmp3, haystack, hlen_neg);
--    (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg);
--    beq(ch1, ch2, MATCH);
--    add(hlen_neg, hlen_neg, haystack_chr_size);
--    blez(hlen_neg, DO1_LOOP);
--    BLOCK_COMMENT("} string_indexof DO1");
--  }
--
--  bind(NOMATCH);
--  mv(result, -1);
--  j(DONE);
--
--  bind(MATCH);
--  srai(t0, hlen_neg, haystack_chr_shift);
--  add(result, result_tmp, t0);
--
--  bind(DONE);
--}
--
--// Compare strings.
--void C2_MacroAssembler::string_compare(Register str1, Register str2,
--                                    Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
--                                    Register tmp3, int ae)
--{
--  Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
--      DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
--      SHORT_LOOP_START, TAIL_CHECK, L;
--
--  const int STUB_THRESHOLD = 64 + 8;
--  bool isLL = ae == StrIntrinsicNode::LL;
--  bool isLU = ae == StrIntrinsicNode::LU;
--  bool isUL = ae == StrIntrinsicNode::UL;
--
--  bool str1_isL = isLL || isLU;
--  bool str2_isL = isLL || isUL;
--
--  // for L strings, 1 byte for 1 character
--  // for U strings, 2 bytes for 1 character
--  int str1_chr_size = str1_isL ? 1 : 2;
--  int str2_chr_size = str2_isL ? 1 : 2;
--  int minCharsInWord = isLL ? wordSize : wordSize / 2;
--
--  load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
--  load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
--
--  BLOCK_COMMENT("string_compare {");
--
--  // Bizzarely, the counts are passed in bytes, regardless of whether they
--  // are L or U strings, however the result is always in characters.
--  if (!str1_isL) {
--    sraiw(cnt1, cnt1, 1);
--  }
--  if (!str2_isL) {
--    sraiw(cnt2, cnt2, 1);
--  }
--
--  // Compute the minimum of the string lengths and save the difference in result.
--  sub(result, cnt1, cnt2);
--  bgt(cnt1, cnt2, L);
--  mv(cnt2, cnt1);
--  bind(L);
--
--  // A very short string
--  li(t0, minCharsInWord);
--  ble(cnt2, t0, SHORT_STRING);
--
--  // Compare longwords
--  // load first parts of strings and finish initialization while loading
--  {
--    if (str1_isL == str2_isL) { // LL or UU
--      // load 8 bytes once to compare
--      ld(tmp1, Address(str1));
--      beq(str1, str2, DONE);
--      ld(tmp2, Address(str2));
--      li(t0, STUB_THRESHOLD);
--      bge(cnt2, t0, STUB);
--      sub(cnt2, cnt2, minCharsInWord);
--      beqz(cnt2, TAIL_CHECK);
--      // convert cnt2 from characters to bytes
--      if (!str1_isL) {
--        slli(cnt2, cnt2, 1);
--      }
--      add(str2, str2, cnt2);
--      add(str1, str1, cnt2);
--      sub(cnt2, zr, cnt2);
--    } else if (isLU) { // LU case
--      lwu(tmp1, Address(str1));
--      ld(tmp2, Address(str2));
--      li(t0, STUB_THRESHOLD);
--      bge(cnt2, t0, STUB);
--      addi(cnt2, cnt2, -4);
--      add(str1, str1, cnt2);
--      sub(cnt1, zr, cnt2);
--      slli(cnt2, cnt2, 1);
--      add(str2, str2, cnt2);
--      inflate_lo32(tmp3, tmp1);
--      mv(tmp1, tmp3);
--      sub(cnt2, zr, cnt2);
--      addi(cnt1, cnt1, 4);
--    } else { // UL case
--      ld(tmp1, Address(str1));
--      lwu(tmp2, Address(str2));
--      li(t0, STUB_THRESHOLD);
--      bge(cnt2, t0, STUB);
--      addi(cnt2, cnt2, -4);
--      slli(t0, cnt2, 1);
--      sub(cnt1, zr, t0);
--      add(str1, str1, t0);
--      add(str2, str2, cnt2);
--      inflate_lo32(tmp3, tmp2);
--      mv(tmp2, tmp3);
--      sub(cnt2, zr, cnt2);
--      addi(cnt1, cnt1, 8);
--    }
--    addi(cnt2, cnt2, isUL ? 4 : 8);
--    bgez(cnt2, TAIL);
--    xorr(tmp3, tmp1, tmp2);
--    bnez(tmp3, DIFFERENCE);
--
--    // main loop
--    bind(NEXT_WORD);
--    if (str1_isL == str2_isL) { // LL or UU
--      add(t0, str1, cnt2);
--      ld(tmp1, Address(t0));
--      add(t0, str2, cnt2);
--      ld(tmp2, Address(t0));
--      addi(cnt2, cnt2, 8);
--    } else if (isLU) { // LU case
--      add(t0, str1, cnt1);
--      lwu(tmp1, Address(t0));
--      add(t0, str2, cnt2);
--      ld(tmp2, Address(t0));
--      addi(cnt1, cnt1, 4);
--      inflate_lo32(tmp3, tmp1);
--      mv(tmp1, tmp3);
--      addi(cnt2, cnt2, 8);
--    } else { // UL case
--      add(t0, str2, cnt2);
--      lwu(tmp2, Address(t0));
--      add(t0, str1, cnt1);
--      ld(tmp1, Address(t0));
--      inflate_lo32(tmp3, tmp2);
--      mv(tmp2, tmp3);
--      addi(cnt1, cnt1, 8);
--      addi(cnt2, cnt2, 4);
--    }
--    bgez(cnt2, TAIL);
--
--    xorr(tmp3, tmp1, tmp2);
--    beqz(tmp3, NEXT_WORD);
--    j(DIFFERENCE);
--    bind(TAIL);
--    xorr(tmp3, tmp1, tmp2);
--    bnez(tmp3, DIFFERENCE);
--    // Last longword.  In the case where length == 4 we compare the
--    // same longword twice, but that's still faster than another
--    // conditional branch.
--    if (str1_isL == str2_isL) { // LL or UU
--      ld(tmp1, Address(str1));
--      ld(tmp2, Address(str2));
--    } else if (isLU) { // LU case
--      lwu(tmp1, Address(str1));
--      ld(tmp2, Address(str2));
--      inflate_lo32(tmp3, tmp1);
--      mv(tmp1, tmp3);
--    } else { // UL case
--      lwu(tmp2, Address(str2));
--      ld(tmp1, Address(str1));
--      inflate_lo32(tmp3, tmp2);
--      mv(tmp2, tmp3);
--    }
--    bind(TAIL_CHECK);
--    xorr(tmp3, tmp1, tmp2);
--    beqz(tmp3, DONE);
--
--    // Find the first different characters in the longwords and
--    // compute their difference.
--    bind(DIFFERENCE);
--    ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb
--    srl(tmp1, tmp1, result);
--    srl(tmp2, tmp2, result);
--    if (isLL) {
--      andi(tmp1, tmp1, 0xFF);
--      andi(tmp2, tmp2, 0xFF);
--    } else {
--      andi(tmp1, tmp1, 0xFFFF);
--      andi(tmp2, tmp2, 0xFFFF);
--    }
--    sub(result, tmp1, tmp2);
--    j(DONE);
--  }
--
--  bind(STUB);
--  RuntimeAddress stub = NULL;
--  switch (ae) {
--    case StrIntrinsicNode::LL:
--      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL());
--      break;
--    case StrIntrinsicNode::UU:
--      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU());
--      break;
--    case StrIntrinsicNode::LU:
--      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU());
--      break;
--    case StrIntrinsicNode::UL:
--      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL());
--      break;
--    default:
--      ShouldNotReachHere();
--  }
--  assert(stub.target() != NULL, "compare_long_string stub has not been generated");
--  trampoline_call(stub);
--  j(DONE);
--
--  bind(SHORT_STRING);
--  // Is the minimum length zero?
--  beqz(cnt2, DONE);
--  // arrange code to do most branches while loading and loading next characters
--  // while comparing previous
--  (this->*str1_load_chr)(tmp1, Address(str1), t0);
--  addi(str1, str1, str1_chr_size);
--  addi(cnt2, cnt2, -1);
--  beqz(cnt2, SHORT_LAST_INIT);
--  (this->*str2_load_chr)(cnt1, Address(str2), t0);
--  addi(str2, str2, str2_chr_size);
--  j(SHORT_LOOP_START);
--  bind(SHORT_LOOP);
--  addi(cnt2, cnt2, -1);
--  beqz(cnt2, SHORT_LAST);
--  bind(SHORT_LOOP_START);
--  (this->*str1_load_chr)(tmp2, Address(str1), t0);
--  addi(str1, str1, str1_chr_size);
--  (this->*str2_load_chr)(t0, Address(str2), t0);
--  addi(str2, str2, str2_chr_size);
--  bne(tmp1, cnt1, SHORT_LOOP_TAIL);
--  addi(cnt2, cnt2, -1);
--  beqz(cnt2, SHORT_LAST2);
--  (this->*str1_load_chr)(tmp1, Address(str1), t0);
--  addi(str1, str1, str1_chr_size);
--  (this->*str2_load_chr)(cnt1, Address(str2), t0);
--  addi(str2, str2, str2_chr_size);
--  beq(tmp2, t0, SHORT_LOOP);
--  sub(result, tmp2, t0);
--  j(DONE);
--  bind(SHORT_LOOP_TAIL);
--  sub(result, tmp1, cnt1);
--  j(DONE);
--  bind(SHORT_LAST2);
--  beq(tmp2, t0, DONE);
--  sub(result, tmp2, t0);
--
--  j(DONE);
--  bind(SHORT_LAST_INIT);
--  (this->*str2_load_chr)(cnt1, Address(str2), t0);
--  addi(str2, str2, str2_chr_size);
--  bind(SHORT_LAST);
--  beq(tmp1, cnt1, DONE);
--  sub(result, tmp1, cnt1);
--
--  bind(DONE);
--
--  BLOCK_COMMENT("} string_compare");
--}
--
--void C2_MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
--                                      Register tmp4, Register tmp5, Register tmp6, Register result,
--                                      Register cnt1, int elem_size) {
--  Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR;
--  Register tmp1 = t0;
--  Register tmp2 = t1;
--  Register cnt2 = tmp2;  // cnt2 only used in array length compare
--  Register elem_per_word = tmp6;
--  int log_elem_size = exact_log2(elem_size);
--  int length_offset = arrayOopDesc::length_offset_in_bytes();
--  int base_offset   = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
--
--  assert(elem_size == 1 || elem_size == 2, "must be char or byte");
--  assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6);
--  li(elem_per_word, wordSize / elem_size);
--
--  BLOCK_COMMENT("arrays_equals {");
--
--  // if (a1 == a2), return true
--  beq(a1, a2, SAME);
--
--  mv(result, false);
--  beqz(a1, DONE);
--  beqz(a2, DONE);
--  lwu(cnt1, Address(a1, length_offset));
--  lwu(cnt2, Address(a2, length_offset));
--  bne(cnt2, cnt1, DONE);
--  beqz(cnt1, SAME);
--
--  slli(tmp5, cnt1, 3 + log_elem_size);
--  sub(tmp5, zr, tmp5);
--  add(a1, a1, base_offset);
--  add(a2, a2, base_offset);
--  ld(tmp3, Address(a1, 0));
--  ld(tmp4, Address(a2, 0));
--  ble(cnt1, elem_per_word, SHORT); // short or same
--
--  // Main 16 byte comparison loop with 2 exits
--  bind(NEXT_DWORD); {
--    ld(tmp1, Address(a1, wordSize));
--    ld(tmp2, Address(a2, wordSize));
--    sub(cnt1, cnt1, 2 * wordSize / elem_size);
--    blez(cnt1, TAIL);
--    bne(tmp3, tmp4, DONE);
--    ld(tmp3, Address(a1, 2 * wordSize));
--    ld(tmp4, Address(a2, 2 * wordSize));
--    add(a1, a1, 2 * wordSize);
--    add(a2, a2, 2 * wordSize);
--    ble(cnt1, elem_per_word, TAIL2);
--  } beq(tmp1, tmp2, NEXT_DWORD);
--  j(DONE);
--
--  bind(TAIL);
--  xorr(tmp4, tmp3, tmp4);
--  xorr(tmp2, tmp1, tmp2);
--  sll(tmp2, tmp2, tmp5);
--  orr(tmp5, tmp4, tmp2);
--  j(IS_TMP5_ZR);
--
--  bind(TAIL2);
--  bne(tmp1, tmp2, DONE);
--
--  bind(SHORT);
--  xorr(tmp4, tmp3, tmp4);
--  sll(tmp5, tmp4, tmp5);
--
--  bind(IS_TMP5_ZR);
--  bnez(tmp5, DONE);
--
--  bind(SAME);
--  mv(result, true);
--  // That's it.
--  bind(DONE);
--
--  BLOCK_COMMENT("} array_equals");
--}
--
--// Compare Strings
--
--// For Strings we're passed the address of the first characters in a1
--// and a2 and the length in cnt1.
--// elem_size is the element size in bytes: either 1 or 2.
--// There are two implementations.  For arrays >= 8 bytes, all
--// comparisons (including the final one, which may overlap) are
--// performed 8 bytes at a time.  For strings < 8 bytes, we compare a
--// halfword, then a short, and then a byte.
--
--void C2_MacroAssembler::string_equals(Register a1, Register a2,
--                                      Register result, Register cnt1, int elem_size)
--{
--  Label SAME, DONE, SHORT, NEXT_WORD;
--  Register tmp1 = t0;
--  Register tmp2 = t1;
--
--  assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte");
--  assert_different_registers(a1, a2, result, cnt1, t0, t1);
--
--  BLOCK_COMMENT("string_equals {");
--
--  mv(result, false);
--
--  // Check for short strings, i.e. smaller than wordSize.
--  sub(cnt1, cnt1, wordSize);
--  bltz(cnt1, SHORT);
--
--  // Main 8 byte comparison loop.
--  bind(NEXT_WORD); {
--    ld(tmp1, Address(a1, 0));
--    add(a1, a1, wordSize);
--    ld(tmp2, Address(a2, 0));
--    add(a2, a2, wordSize);
--    sub(cnt1, cnt1, wordSize);
--    bne(tmp1, tmp2, DONE);
--  } bgtz(cnt1, NEXT_WORD);
--
--  // Last longword.  In the case where length == 4 we compare the
--  // same longword twice, but that's still faster than another
--  // conditional branch.
--  // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
--  // length == 4.
--  add(tmp1, a1, cnt1);
--  ld(tmp1, Address(tmp1, 0));
--  add(tmp2, a2, cnt1);
--  ld(tmp2, Address(tmp2, 0));
--  bne(tmp1, tmp2, DONE);
--  j(SAME);
--
--  bind(SHORT);
--  Label TAIL03, TAIL01;
--
--  // 0-7 bytes left.
--  andi(t0, cnt1, 4);
--  beqz(t0, TAIL03);
--  {
--    lwu(tmp1, Address(a1, 0));
--    add(a1, a1, 4);
--    lwu(tmp2, Address(a2, 0));
--    add(a2, a2, 4);
--    bne(tmp1, tmp2, DONE);
--  }
--
--  bind(TAIL03);
--  // 0-3 bytes left.
--  andi(t0, cnt1, 2);
--  beqz(t0, TAIL01);
--  {
--    lhu(tmp1, Address(a1, 0));
--    add(a1, a1, 2);
--    lhu(tmp2, Address(a2, 0));
--    add(a2, a2, 2);
--    bne(tmp1, tmp2, DONE);
--  }
--
--  bind(TAIL01);
--  if (elem_size == 1) { // Only needed when comparing 1-byte elements
--    // 0-1 bytes left.
--    andi(t0, cnt1, 1);
--    beqz(t0, SAME);
--    {
--      lbu(tmp1, a1, 0);
--      lbu(tmp2, a2, 0);
--      bne(tmp1, tmp2, DONE);
--    }
--  }
--
--  // Arrays are equal.
--  bind(SAME);
--  mv(result, true);
--
--  // That's it.
--  bind(DONE);
--  BLOCK_COMMENT("} string_equals");
--}
--
--typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far);
--typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label,
--                                                              bool is_far, bool is_unordered);
--
--static conditional_branch_insn conditional_branches[] =
--{
--  /* SHORT branches */
--  (conditional_branch_insn)&Assembler::beq,
--  (conditional_branch_insn)&Assembler::bgt,
--  NULL, // BoolTest::overflow
--  (conditional_branch_insn)&Assembler::blt,
--  (conditional_branch_insn)&Assembler::bne,
--  (conditional_branch_insn)&Assembler::ble,
--  NULL, // BoolTest::no_overflow
--  (conditional_branch_insn)&Assembler::bge,
--
--  /* UNSIGNED branches */
--  (conditional_branch_insn)&Assembler::beq,
--  (conditional_branch_insn)&Assembler::bgtu,
--  NULL,
--  (conditional_branch_insn)&Assembler::bltu,
--  (conditional_branch_insn)&Assembler::bne,
--  (conditional_branch_insn)&Assembler::bleu,
--  NULL,
--  (conditional_branch_insn)&Assembler::bgeu
--};
--
--static float_conditional_branch_insn float_conditional_branches[] =
--{
--  /* FLOAT SHORT branches */
--  (float_conditional_branch_insn)&MacroAssembler::float_beq,
--  (float_conditional_branch_insn)&MacroAssembler::float_bgt,
--  NULL,  // BoolTest::overflow
--  (float_conditional_branch_insn)&MacroAssembler::float_blt,
--  (float_conditional_branch_insn)&MacroAssembler::float_bne,
--  (float_conditional_branch_insn)&MacroAssembler::float_ble,
--  NULL, // BoolTest::no_overflow
--  (float_conditional_branch_insn)&MacroAssembler::float_bge,
--
--  /* DOUBLE SHORT branches */
--  (float_conditional_branch_insn)&MacroAssembler::double_beq,
--  (float_conditional_branch_insn)&MacroAssembler::double_bgt,
--  NULL,
--  (float_conditional_branch_insn)&MacroAssembler::double_blt,
--  (float_conditional_branch_insn)&MacroAssembler::double_bne,
--  (float_conditional_branch_insn)&MacroAssembler::double_ble,
--  NULL,
--  (float_conditional_branch_insn)&MacroAssembler::double_bge
--};
--
--void C2_MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) {
--  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])),
--         "invalid conditional branch index");
--  (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far);
--}
--
--// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use
--// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode().
--void C2_MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) {
--  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])),
--         "invalid float conditional branch index");
--  int booltest_flag = cmpFlag & ~(C2_MacroAssembler::double_branch_mask);
--  (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far,
--    (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true);
--}
--
--void C2_MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
--  switch (cmpFlag) {
--    case BoolTest::eq:
--    case BoolTest::le:
--      beqz(op1, L, is_far);
--      break;
--    case BoolTest::ne:
--    case BoolTest::gt:
--      bnez(op1, L, is_far);
--      break;
--    default:
--      ShouldNotReachHere();
--  }
--}
--
--void C2_MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
--  switch (cmpFlag) {
--    case BoolTest::eq:
--      beqz(op1, L, is_far);
--      break;
--    case BoolTest::ne:
--      bnez(op1, L, is_far);
--      break;
--    default:
--      ShouldNotReachHere();
--  }
--}
--
--void C2_MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) {
--  Label L;
--  cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L);
--  mv(dst, src);
--  bind(L);
--}
--
--// Set dst to NaN if any NaN input.
--void C2_MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2,
--                                  bool is_double, bool is_min) {
--  assert_different_registers(dst, src1, src2);
--
--  Label Done;
--  fsflags(zr);
--  if (is_double) {
--    is_min ? fmin_d(dst, src1, src2)
--           : fmax_d(dst, src1, src2);
--    // Checking NaNs
--    flt_d(zr, src1, src2);
--  } else {
--    is_min ? fmin_s(dst, src1, src2)
--           : fmax_s(dst, src1, src2);
--    // Checking NaNs
--    flt_s(zr, src1, src2);
--  }
--
--  frflags(t0);
--  beqz(t0, Done);
--
--  // In case of NaNs
--  is_double ? fadd_d(dst, src1, src2)
--            : fadd_s(dst, src1, src2);
--
--  bind(Done);
--}
-diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
-deleted file mode 100644
-index 90b6554af02..00000000000
---- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
-+++ /dev/null
-@@ -1,141 +0,0 @@
--/*
-- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#ifndef CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP
--#define CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP
--
--// C2_MacroAssembler contains high-level macros for C2
--
-- public:
--
--  void string_compare(Register str1, Register str2,
--                      Register cnt1, Register cnt2, Register result,
--                      Register tmp1, Register tmp2, Register tmp3,
--                      int ae);
--
--  void string_indexof_char_short(Register str1, Register cnt1,
--                                 Register ch, Register result,
--                                 bool isL);
--
--  void string_indexof_char(Register str1, Register cnt1,
--                           Register ch, Register result,
--                           Register tmp1, Register tmp2,
--                           Register tmp3, Register tmp4,
--                           bool isL);
--
--  void string_indexof(Register str1, Register str2,
--                      Register cnt1, Register cnt2,
--                      Register tmp1, Register tmp2,
--                      Register tmp3, Register tmp4,
--                      Register tmp5, Register tmp6,
--                      Register result, int ae);
--
--  void string_indexof_linearscan(Register haystack, Register needle,
--                                 Register haystack_len, Register needle_len,
--                                 Register tmp1, Register tmp2,
--                                 Register tmp3, Register tmp4,
--                                 int needle_con_cnt, Register result, int ae);
--
--  void arrays_equals(Register r1, Register r2,
--                     Register tmp3, Register tmp4,
--                     Register tmp5, Register tmp6,
--                     Register result, Register cnt1,
--                     int elem_size);
--
--  void string_equals(Register r1, Register r2,
--                     Register result, Register cnt1,
--                     int elem_size);
--
--  // refer to conditional_branches and float_conditional_branches
--  static const int bool_test_bits = 3;
--  static const int neg_cond_bits = 2;
--  static const int unsigned_branch_mask = 1 << bool_test_bits;
--  static const int double_branch_mask = 1 << bool_test_bits;
--
--  // cmp
--  void cmp_branch(int cmpFlag,
--                  Register op1, Register op2,
--                  Label& label, bool is_far = false);
--
--  void float_cmp_branch(int cmpFlag,
--                        FloatRegister op1, FloatRegister op2,
--                        Label& label, bool is_far = false);
--
--  void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op,
--                                    Label& L, bool is_far = false);
--
--  void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op,
--                               Label& L, bool is_far = false);
--
--  void enc_cmove(int cmpFlag,
--                 Register op1, Register op2,
--                 Register dst, Register src);
--
--  void spill(Register r, bool is64, int offset) {
--    is64 ? sd(r, Address(sp, offset))
--         : sw(r, Address(sp, offset));
--  }
--
--  void spill(FloatRegister f, bool is64, int offset) {
--    is64 ? fsd(f, Address(sp, offset))
--         : fsw(f, Address(sp, offset));
--  }
--
--  void spill(VectorRegister v, int offset) {
--    add(t0, sp, offset);
--    vs1r_v(v, t0);
--  }
--
--  void unspill(Register r, bool is64, int offset) {
--    is64 ? ld(r, Address(sp, offset))
--         : lw(r, Address(sp, offset));
--  }
--
--  void unspillu(Register r, bool is64, int offset) {
--    is64 ? ld(r, Address(sp, offset))
--         : lwu(r, Address(sp, offset));
--  }
--
--  void unspill(FloatRegister f, bool is64, int offset) {
--    is64 ? fld(f, Address(sp, offset))
--         : flw(f, Address(sp, offset));
--  }
--
--  void unspill(VectorRegister v, int offset) {
--    add(t0, sp, offset);
--    vl1r_v(v, t0);
--  }
--
--  void spill_copy_vector_stack_to_stack(int src_offset, int dst_offset, int vec_reg_size_in_bytes) {
--    assert(vec_reg_size_in_bytes % 16 == 0, "unexpected vector reg size");
--    unspill(v0, src_offset);
--    spill(v0, dst_offset);
--  }
--
--  void minmax_FD(FloatRegister dst,
--                 FloatRegister src1, FloatRegister src2,
--                 bool is_double, bool is_min);
--
--#endif // CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index 304b6f2b06c..d175a62aeeb 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -4125,3 +4125,1285 @@ void MacroAssembler::safepoint_ifence() {
-   ifence();
- }
- 
-+#ifdef COMPILER2
-+// short string
-+// StringUTF16.indexOfChar
-+// StringLatin1.indexOfChar
-+void MacroAssembler::string_indexof_char_short(Register str1, Register cnt1,
-+                                                  Register ch, Register result,
-+                                                  bool isL)
-+{
-+  Register ch1 = t0;
-+  Register index = t1;
-+
-+  BLOCK_COMMENT("string_indexof_char_short {");
-+
-+  Label LOOP, LOOP1, LOOP4, LOOP8;
-+  Label MATCH,  MATCH1, MATCH2, MATCH3,
-+          MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH;
-+
-+  mv(result, -1);
-+  mv(index, zr);
-+
-+  bind(LOOP);
-+  addi(t0, index, 8);
-+  ble(t0, cnt1, LOOP8);
-+  addi(t0, index, 4);
-+  ble(t0, cnt1, LOOP4);
-+  j(LOOP1);
-+
-+  bind(LOOP8);
-+  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
-+  beq(ch, ch1, MATCH);
-+  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
-+  beq(ch, ch1, MATCH1);
-+  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
-+  beq(ch, ch1, MATCH2);
-+  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
-+  beq(ch, ch1, MATCH3);
-+  isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8));
-+  beq(ch, ch1, MATCH4);
-+  isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10));
-+  beq(ch, ch1, MATCH5);
-+  isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12));
-+  beq(ch, ch1, MATCH6);
-+  isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14));
-+  beq(ch, ch1, MATCH7);
-+  addi(index, index, 8);
-+  addi(str1, str1, isL ? 8 : 16);
-+  blt(index, cnt1, LOOP);
-+  j(NOMATCH);
-+
-+  bind(LOOP4);
-+  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
-+  beq(ch, ch1, MATCH);
-+  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
-+  beq(ch, ch1, MATCH1);
-+  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
-+  beq(ch, ch1, MATCH2);
-+  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
-+  beq(ch, ch1, MATCH3);
-+  addi(index, index, 4);
-+  addi(str1, str1, isL ? 4 : 8);
-+  bge(index, cnt1, NOMATCH);
++package sun.jvm.hotspot.debugger.riscv64;
 +
-+  bind(LOOP1);
-+  isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1));
-+  beq(ch, ch1, MATCH);
-+  addi(index, index, 1);
-+  addi(str1, str1, isL ? 1 : 2);
-+  blt(index, cnt1, LOOP1);
-+  j(NOMATCH);
++import java.lang.annotation.Native;
 +
-+  bind(MATCH1);
-+  addi(index, index, 1);
-+  j(MATCH);
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.cdbg.*;
 +
-+  bind(MATCH2);
-+  addi(index, index, 2);
-+  j(MATCH);
++/** Specifies the thread context on riscv64 platforms; only a sub-portion
++ * of the context is guaranteed to be present on all operating
++ * systems. */
 +
-+  bind(MATCH3);
-+  addi(index, index, 3);
-+  j(MATCH);
++public abstract class RISCV64ThreadContext implements ThreadContext {
++    // Taken from /usr/include/asm/sigcontext.h on Linux/RISCV64.
 +
-+  bind(MATCH4);
-+  addi(index, index, 4);
-+  j(MATCH);
++    //  /*
++    //   * Signal context structure - contains all info to do with the state
++    //   * before the signal handler was invoked.
++    //   */
++    // struct sigcontext {
++    //   struct user_regs_struct sc_regs;
++    //   union __riscv_fp_state sc_fpregs;
++    // };
++    //
++    // struct user_regs_struct {
++    //    unsigned long pc;
++    //    unsigned long ra;
++    //    unsigned long sp;
++    //    unsigned long gp;
++    //    unsigned long tp;
++    //    unsigned long t0;
++    //    unsigned long t1;
++    //    unsigned long t2;
++    //    unsigned long s0;
++    //    unsigned long s1;
++    //    unsigned long a0;
++    //    unsigned long a1;
++    //    unsigned long a2;
++    //    unsigned long a3;
++    //    unsigned long a4;
++    //    unsigned long a5;
++    //    unsigned long a6;
++    //    unsigned long a7;
++    //    unsigned long s2;
++    //    unsigned long s3;
++    //    unsigned long s4;
++    //    unsigned long s5;
++    //    unsigned long s6;
++    //    unsigned long s7;
++    //    unsigned long s8;
++    //    unsigned long s9;
++    //    unsigned long s10;
++    //    unsigned long s11;
++    //    unsigned long t3;
++    //    unsigned long t4;
++    //    unsigned long t5;
++    //    unsigned long t6;
++    // };
 +
-+  bind(MATCH5);
-+  addi(index, index, 5);
-+  j(MATCH);
++    // NOTE: the indices for the various registers must be maintained as
++    // listed across various operating systems. However, only a small
++    // subset of the registers' values are guaranteed to be present (and
++    // must be present for the SA's stack walking to work)
 +
-+  bind(MATCH6);
-+  addi(index, index, 6);
-+  j(MATCH);
++    // One instance of the Native annotation is enough to trigger header generation
++    // for this file.
++    @Native
++    public static final int R0 = 0;
++    public static final int R1 = 1;
++    public static final int R2 = 2;
++    public static final int R3 = 3;
++    public static final int R4 = 4;
++    public static final int R5 = 5;
++    public static final int R6 = 6;
++    public static final int R7 = 7;
++    public static final int R8 = 8;
++    public static final int R9 = 9;
++    public static final int R10 = 10;
++    public static final int R11 = 11;
++    public static final int R12 = 12;
++    public static final int R13 = 13;
++    public static final int R14 = 14;
++    public static final int R15 = 15;
++    public static final int R16 = 16;
++    public static final int R17 = 17;
++    public static final int R18 = 18;
++    public static final int R19 = 19;
++    public static final int R20 = 20;
++    public static final int R21 = 21;
++    public static final int R22 = 22;
++    public static final int R23 = 23;
++    public static final int R24 = 24;
++    public static final int R25 = 25;
++    public static final int R26 = 26;
++    public static final int R27 = 27;
++    public static final int R28 = 28;
++    public static final int R29 = 29;
++    public static final int R30 = 30;
++    public static final int R31 = 31;
 +
-+  bind(MATCH7);
-+  addi(index, index, 7);
++    public static final int NPRGREG = 32;
 +
-+  bind(MATCH);
-+  mv(result, index);
-+  bind(NOMATCH);
-+  BLOCK_COMMENT("} string_indexof_char_short");
-+}
++    public static final int PC = R0;
++    public static final int LR = R1;
++    public static final int SP = R2;
++    public static final int FP = R8;
 +
-+// StringUTF16.indexOfChar
-+// StringLatin1.indexOfChar
-+void MacroAssembler::string_indexof_char(Register str1, Register cnt1,
-+                                            Register ch, Register result,
-+                                            Register tmp1, Register tmp2,
-+                                            Register tmp3, Register tmp4,
-+                                            bool isL)
-+{
-+  Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG;
-+  Register ch1 = t0;
-+  Register orig_cnt = t1;
-+  Register mask1 = tmp3;
-+  Register mask2 = tmp2;
-+  Register match_mask = tmp1;
-+  Register trailing_char = tmp4;
-+  Register unaligned_elems = tmp4;
++    private long[] data;
 +
-+  BLOCK_COMMENT("string_indexof_char {");
-+  beqz(cnt1, NOMATCH);
++    public RISCV64ThreadContext() {
++        data = new long[NPRGREG];
++    }
 +
-+  addi(t0, cnt1, isL ? -32 : -16);
-+  bgtz(t0, DO_LONG);
-+  string_indexof_char_short(str1, cnt1, ch, result, isL);
-+  j(DONE);
++    public int getNumRegisters() {
++        return NPRGREG;
++    }
 +
-+  bind(DO_LONG);
-+  mv(orig_cnt, cnt1);
-+  if (AvoidUnalignedAccesses) {
-+    Label ALIGNED;
-+    andi(unaligned_elems, str1, 0x7);
-+    beqz(unaligned_elems, ALIGNED);
-+    sub(unaligned_elems, unaligned_elems, 8);
-+    neg(unaligned_elems, unaligned_elems);
-+    if (!isL) {
-+      srli(unaligned_elems, unaligned_elems, 1);
++    public String getRegisterName(int index) {
++        switch (index) {
++        case LR: return "lr";
++        case SP: return "sp";
++        case PC: return "pc";
++        default:
++            return "r" + index;
++        }
 +    }
-+    // do unaligned part per element
-+    string_indexof_char_short(str1, unaligned_elems, ch, result, isL);
-+    bgez(result, DONE);
-+    mv(orig_cnt, cnt1);
-+    sub(cnt1, cnt1, unaligned_elems);
-+    bind(ALIGNED);
-+  }
 +
-+  // duplicate ch
-+  if (isL) {
-+    slli(ch1, ch, 8);
-+    orr(ch, ch1, ch);
-+  }
-+  slli(ch1, ch, 16);
-+  orr(ch, ch1, ch);
-+  slli(ch1, ch, 32);
-+  orr(ch, ch1, ch);
++    public void setRegister(int index, long value) {
++        data[index] = value;
++    }
 +
-+  if (!isL) {
-+    slli(cnt1, cnt1, 1);
-+  }
++    public long getRegister(int index) {
++        return data[index];
++    }
 +
-+  uint64_t mask0101 = UCONST64(0x0101010101010101);
-+  uint64_t mask0001 = UCONST64(0x0001000100010001);
-+  mv(mask1, isL ? mask0101 : mask0001);
-+  uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f);
-+  uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff);
-+  mv(mask2, isL ? mask7f7f : mask7fff);
++    public CFrame getTopFrame(Debugger dbg) {
++        return null;
++    }
 +
-+  bind(CH1_LOOP);
-+  ld(ch1, Address(str1));
-+  addi(str1, str1, 8);
-+  addi(cnt1, cnt1, -8);
-+  compute_match_mask(ch1, ch, match_mask, mask1, mask2);
-+  bnez(match_mask, HIT);
-+  bgtz(cnt1, CH1_LOOP);
-+  j(NOMATCH);
++    /** This can't be implemented in this class since we would have to
++     * tie the implementation to, for example, the debugging system */
++    public abstract void setRegisterAsAddress(int index, Address value);
 +
-+  bind(HIT);
-+  ctzc_bit(trailing_char, match_mask, isL, ch1, result);
-+  srli(trailing_char, trailing_char, 3);
-+  addi(cnt1, cnt1, 8);
-+  ble(cnt1, trailing_char, NOMATCH);
-+  // match case
-+  if (!isL) {
-+    srli(cnt1, cnt1, 1);
-+    srli(trailing_char, trailing_char, 1);
-+  }
++    /** This can't be implemented in this class since we would have to
++     * tie the implementation to, for example, the debugging system */
++    public abstract Address getRegisterAsAddress(int index);
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
+index 190062785a..89d676fe3b 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -38,6 +38,7 @@ import sun.jvm.hotspot.runtime.win32_aarch64.Win32AARCH64JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.linux_x86.LinuxX86JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess;
++import sun.jvm.hotspot.runtime.linux_riscv64.LinuxRISCV64JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.linux_ppc64.LinuxPPC64JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.linux_sparc.LinuxSPARCJavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess;
+@@ -99,6 +100,8 @@ public class Threads {
+                 access = new LinuxPPC64JavaThreadPDAccess();
+             } else if (cpu.equals("aarch64")) {
+                 access = new LinuxAARCH64JavaThreadPDAccess();
++            } else if (cpu.equals("riscv64")) {
++                access = new LinuxRISCV64JavaThreadPDAccess();
+             } else {
+               try {
+                 access = (JavaThreadPDAccess)
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
+new file mode 100644
+index 0000000000..5c2b6e0e3e
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
+@@ -0,0 +1,132 @@
++/*
++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  sub(result, orig_cnt, cnt1);
-+  add(result, result, trailing_char);
-+  j(DONE);
++package sun.jvm.hotspot.runtime.linux_riscv64;
 +
-+  bind(NOMATCH);
-+  mv(result, -1);
++import java.io.*;
++import java.util.*;
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.riscv64.*;
++import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.runtime.riscv64.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.utilities.*;
 +
-+  bind(DONE);
-+  BLOCK_COMMENT("} string_indexof_char");
-+}
++public class LinuxRISCV64JavaThreadPDAccess implements JavaThreadPDAccess {
++  private static AddressField  lastJavaFPField;
++  private static AddressField  osThreadField;
 +
-+typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp);
++  // Field from OSThread
++  private static CIntegerField osThreadThreadIDField;
 +
-+// Search for needle in haystack and return index or -1
-+// x10: result
-+// x11: haystack
-+// x12: haystack_len
-+// x13: needle
-+// x14: needle_len
-+void MacroAssembler::string_indexof(Register haystack, Register needle,
-+                                       Register haystack_len, Register needle_len,
-+                                       Register tmp1, Register tmp2,
-+                                       Register tmp3, Register tmp4,
-+                                       Register tmp5, Register tmp6,
-+                                       Register result, int ae)
-+{
-+  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
++  // This is currently unneeded but is being kept in case we change
++  // the currentFrameGuess algorithm
++  private static final long GUESS_SCAN_RANGE = 128 * 1024;
 +
-+  Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH;
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
 +
-+  Register ch1 = t0;
-+  Register ch2 = t1;
-+  Register nlen_tmp = tmp1; // needle len tmp
-+  Register hlen_tmp = tmp2; // haystack len tmp
-+  Register result_tmp = tmp4;
++  private static synchronized void initialize(TypeDataBase db) {
++    Type type = db.lookupType("JavaThread");
++    osThreadField           = type.getAddressField("_osthread");
 +
-+  bool isLL = ae == StrIntrinsicNode::LL;
++    Type anchorType = db.lookupType("JavaFrameAnchor");
++    lastJavaFPField         = anchorType.getAddressField("_last_Java_fp");
 +
-+  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
-+  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
-+  int needle_chr_shift = needle_isL ? 0 : 1;
-+  int haystack_chr_shift = haystack_isL ? 0 : 1;
-+  int needle_chr_size = needle_isL ? 1 : 2;
-+  int haystack_chr_size = haystack_isL ? 1 : 2;
-+  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
-+                                   (load_chr_insn)&MacroAssembler::lhu;
-+  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
-+                                     (load_chr_insn)&MacroAssembler::lhu;
++    Type osThreadType = db.lookupType("OSThread");
++    osThreadThreadIDField   = osThreadType.getCIntegerField("_thread_id");
++  }
 +
-+  BLOCK_COMMENT("string_indexof {");
++  public Address getLastJavaFP(Address addr) {
++    return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset()));
++  }
 +
-+  // Note, inline_string_indexOf() generates checks:
-+  // if (pattern.count > src.count) return -1;
-+  // if (pattern.count == 0) return 0;
++  public Address getLastJavaPC(Address addr) {
++    return null;
++  }
 +
-+  // We have two strings, a source string in haystack, haystack_len and a pattern string
-+  // in needle, needle_len. Find the first occurence of pattern in source or return -1.
++  public Address getBaseOfStackPointer(Address addr) {
++    return null;
++  }
 +
-+  // For larger pattern and source we use a simplified Boyer Moore algorithm.
-+  // With a small pattern and source we use linear scan.
++  public Frame getLastFramePD(JavaThread thread, Address addr) {
++    Address fp = thread.getLastJavaFP();
++    if (fp == null) {
++      return null; // no information
++    }
++    return new RISCV64Frame(thread.getLastJavaSP(), fp);
++  }
 +
-+  // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm.
-+  sub(result_tmp, haystack_len, needle_len);
-+  // needle_len < 8, use linear scan
-+  sub(t0, needle_len, 8);
-+  bltz(t0, LINEARSEARCH);
-+  // needle_len >= 256, use linear scan
-+  sub(t0, needle_len, 256);
-+  bgez(t0, LINEARSTUB);
-+  // needle_len >= haystack_len/4, use linear scan
-+  srli(t0, haystack_len, 2);
-+  bge(needle_len, t0, LINEARSTUB);
++  public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) {
++    return new RISCV64RegisterMap(thread, updateMap);
++  }
 +
-+  // Boyer-Moore-Horspool introduction:
-+  // The Boyer Moore alogorithm is based on the description here:-
-+  //
-+  // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
-+  //
-+  // This describes and algorithm with 2 shift rules. The 'Bad Character' rule
-+  // and the 'Good Suffix' rule.
-+  //
-+  // These rules are essentially heuristics for how far we can shift the
-+  // pattern along the search string.
-+  //
-+  // The implementation here uses the 'Bad Character' rule only because of the
-+  // complexity of initialisation for the 'Good Suffix' rule.
-+  //
-+  // This is also known as the Boyer-Moore-Horspool algorithm:
-+  //
-+  // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
-+  //
-+  // #define ASIZE 256
-+  //
-+  //    int bm(unsigned char *pattern, int m, unsigned char *src, int n) {
-+  //      int i, j;
-+  //      unsigned c;
-+  //      unsigned char bc[ASIZE];
-+  //
-+  //      /* Preprocessing */
-+  //      for (i = 0; i < ASIZE; ++i)
-+  //        bc[i] = m;
-+  //      for (i = 0; i < m - 1; ) {
-+  //        c = pattern[i];
-+  //        ++i;
-+  //        // c < 256 for Latin1 string, so, no need for branch
-+  //        #ifdef PATTERN_STRING_IS_LATIN1
-+  //        bc[c] = m - i;
-+  //        #else
-+  //        if (c < ASIZE) bc[c] = m - i;
-+  //        #endif
-+  //      }
-+  //
-+  //      /* Searching */
-+  //      j = 0;
-+  //      while (j <= n - m) {
-+  //        c = src[i+j];
-+  //        if (pattern[m-1] == c)
-+  //          int k;
-+  //          for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
-+  //          if (k < 0) return j;
-+  //          // c < 256 for Latin1 string, so, no need for branch
-+  //          #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1
-+  //          // LL case: (c< 256) always true. Remove branch
-+  //          j += bc[pattern[j+m-1]];
-+  //          #endif
-+  //          #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF
-+  //          // UU case: need if (c<ASIZE) check. Skip 1 character if not.
-+  //          if (c < ASIZE)
-+  //            j += bc[pattern[j+m-1]];
-+  //          else
-+  //            j += 1
-+  //          #endif
-+  //          #ifdef SOURCE_IS_UTF_AND_PATTERN_IS_LATIN1
-+  //          // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.
-+  //          if (c < ASIZE)
-+  //            j += bc[pattern[j+m-1]];
-+  //          else
-+  //            j += m
-+  //          #endif
-+  //      }
-+  //      return -1;
-+  //    }
++  public Frame getCurrentFrameGuess(JavaThread thread, Address addr) {
++    ThreadProxy t = getThreadProxy(addr);
++    RISCV64ThreadContext context = (RISCV64ThreadContext) t.getContext();
++    RISCV64CurrentFrameGuess guesser = new RISCV64CurrentFrameGuess(context, thread);
++    if (!guesser.run(GUESS_SCAN_RANGE)) {
++      return null;
++    }
++    if (guesser.getPC() == null) {
++      return new RISCV64Frame(guesser.getSP(), guesser.getFP());
++    } else {
++      return new RISCV64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC());
++    }
++  }
 +
-+  // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result
-+  Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,
-+          BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;
++  public void printThreadIDOn(Address addr, PrintStream tty) {
++    tty.print(getThreadProxy(addr));
++  }
++
++  public void printInfoOn(Address threadAddr, PrintStream tty) {
++    tty.print("Thread id: ");
++    printThreadIDOn(threadAddr, tty);
++  }
 +
-+  Register haystack_end = haystack_len;
-+  Register skipch = tmp2;
++  public Address getLastSP(Address addr) {
++    ThreadProxy t = getThreadProxy(addr);
++    RISCV64ThreadContext context = (RISCV64ThreadContext) t.getContext();
++    return context.getRegisterAsAddress(RISCV64ThreadContext.SP);
++  }
 +
-+  // pattern length is >=8, so, we can read at least 1 register for cases when
-+  // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for
-+  // UL case. We'll re-read last character in inner pre-loop code to have
-+  // single outer pre-loop load
-+  const int firstStep = isLL ? 7 : 3;
++  public ThreadProxy getThreadProxy(Address addr) {
++    // Addr is the address of the JavaThread.
++    // Fetch the OSThread (for now and for simplicity, not making a
++    // separate "OSThread" class in this package)
++    Address osThreadAddr = osThreadField.getValue(addr);
++    // Get the address of the _thread_id from the OSThread
++    Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset());
 +
-+  const int ASIZE = 256;
-+  const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd)
++    JVMDebugger debugger = VM.getVM().getDebugger();
++    return debugger.getThreadForIdentifierAddress(threadIdAddr);
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java
+new file mode 100644
+index 0000000000..34701c6922
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java
+@@ -0,0 +1,223 @@
++/*
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2019, Red Hat Inc.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  sub(sp, sp, ASIZE);
++package sun.jvm.hotspot.runtime.riscv64;
 +
-+  // init BC offset table with default value: needle_len
-+  slli(t0, needle_len, 8);
-+  orr(t0, t0, needle_len); // [63...16][needle_len][needle_len]
-+  slli(tmp1, t0, 16);
-+  orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len]
-+  slli(tmp1, t0, 32);
-+  orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len]
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.riscv64.*;
++import sun.jvm.hotspot.code.*;
++import sun.jvm.hotspot.interpreter.*;
++import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.runtime.riscv64.*;
 +
-+  mv(ch1, sp);  // ch1 is t0
-+  mv(tmp6, ASIZE / STORE_BYTES); // loop iterations
++/** <P> Should be able to be used on all riscv64 platforms we support
++    (Linux/riscv64) to implement JavaThread's "currentFrameGuess()"
++    functionality. Input is an RISCV64ThreadContext; output is SP, FP,
++    and PC for an RISCV64Frame. Instantiation of the RISCV64Frame is
++    left to the caller, since we may need to subclass RISCV64Frame to
++    support signal handler frames on Unix platforms. </P>
 +
-+  bind(BM_INIT_LOOP);
-+  // for (i = 0; i < ASIZE; ++i)
-+  //   bc[i] = m;
-+  for (int i = 0; i < 4; i++) {
-+    sd(tmp5, Address(ch1, i * wordSize));
-+  }
-+  add(ch1, ch1, 32);
-+  sub(tmp6, tmp6, 4);
-+  bgtz(tmp6, BM_INIT_LOOP);
++    <P> Algorithm is to walk up the stack within a given range (say,
++    512K at most) looking for a plausible PC and SP for a Java frame,
++    also considering those coming in from the context. If we find a PC
++    that belongs to the VM (i.e., in generated code like the
++    interpreter or CodeCache) then we try to find an associated FP.
++    We repeat this until we either find a complete frame or run out of
++    stack to look at. </P> */
 +
-+  sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern
-+  Register orig_haystack = tmp5;
-+  mv(orig_haystack, haystack);
-+  // result_tmp = tmp4
-+  shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift);
-+  sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1
-+  mv(tmp3, needle);
++public class RISCV64CurrentFrameGuess {
++  private RISCV64ThreadContext context;
++  private JavaThread       thread;
++  private Address          spFound;
++  private Address          fpFound;
++  private Address          pcFound;
 +
-+  //  for (i = 0; i < m - 1; ) {
-+  //    c = pattern[i];
-+  //    ++i;
-+  //    // c < 256 for Latin1 string, so, no need for branch
-+  //    #ifdef PATTERN_STRING_IS_LATIN1
-+  //    bc[c] = m - i;
-+  //    #else
-+  //    if (c < ASIZE) bc[c] = m - i;
-+  //    #endif
-+  //  }
-+  bind(BCLOOP);
-+  (this->*needle_load_1chr)(ch1, Address(tmp3), noreg);
-+  add(tmp3, tmp3, needle_chr_size);
-+  if (!needle_isL) {
-+    // ae == StrIntrinsicNode::UU
-+    mv(tmp6, ASIZE);
-+    bgeu(ch1, tmp6, BCSKIP);
++  private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.riscv64.RISCV64Frame.DEBUG")
++                                       != null;
++
++  public RISCV64CurrentFrameGuess(RISCV64ThreadContext context,
++                              JavaThread thread) {
++    this.context = context;
++    this.thread  = thread;
 +  }
-+  add(tmp4, sp, ch1);
-+  sb(ch2, Address(tmp4)); // store skip offset to BC offset table
 +
-+  bind(BCSKIP);
-+  sub(ch2, ch2, 1); // for next pattern element, skip distance -1
-+  bgtz(ch2, BCLOOP);
++  /** Returns false if not able to find a frame within a reasonable range. */
++  public boolean run(long regionInBytesToSearch) {
++    Address sp  = context.getRegisterAsAddress(RISCV64ThreadContext.SP);
++    Address pc  = context.getRegisterAsAddress(RISCV64ThreadContext.PC);
++    Address fp  = context.getRegisterAsAddress(RISCV64ThreadContext.FP);
++    if (sp == null) {
++      // Bail out if no last java frame either
++      if (thread.getLastJavaSP() != null) {
++        setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null);
++        return true;
++      }
++      return false;
++    }
++    Address end = sp.addOffsetTo(regionInBytesToSearch);
++    VM vm       = VM.getVM();
 +
-+  // tmp6: pattern end, address after needle
-+  shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift);
-+  if (needle_isL == haystack_isL) {
-+    // load last 8 bytes (8LL/4UU symbols)
-+    ld(tmp6, Address(tmp6, -wordSize));
-+  } else {
-+    // UL: from UTF-16(source) search Latin1(pattern)
-+    lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols)
-+    // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d
-+    // We'll have to wait until load completed, but it's still faster than per-character loads+checks
-+    srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a
-+    slli(ch2, tmp6, XLEN - 24);
-+    srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b
-+    slli(ch1, tmp6, XLEN - 16);
-+    srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c
-+    andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d
-+    slli(ch2, ch2, 16);
-+    orr(ch2, ch2, ch1); // 0x00000b0c
-+    slli(result, tmp3, 48); // use result as temp register
-+    orr(tmp6, tmp6, result); // 0x0a00000d
-+    slli(result, ch2, 16);
-+    orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d
-+  }
++    setValues(null, null, null); // Assume we're not going to find anything
 +
-+  // i = m - 1;
-+  // skipch = j + i;
-+  // if (skipch == pattern[m - 1]
-+  //   for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
-+  // else
-+  //   move j with bad char offset table
-+  bind(BMLOOPSTR2);
-+  // compare pattern to source string backward
-+  shadd(result, nlen_tmp, haystack, result, haystack_chr_shift);
-+  (this->*haystack_load_1chr)(skipch, Address(result), noreg);
-+  sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8
-+  if (needle_isL == haystack_isL) {
-+    // re-init tmp3. It's for free because it's executed in parallel with
-+    // load above. Alternative is to initialize it before loop, but it'll
-+    // affect performance on in-order systems with 2 or more ld/st pipelines
-+    srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1]
-+  }
-+  if (!isLL) { // UU/UL case
-+    slli(ch2, nlen_tmp, 1); // offsets in bytes
-+  }
-+  bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char
-+  add(result, haystack, isLL ? nlen_tmp : ch2);
-+  ld(ch2, Address(result)); // load 8 bytes from source string
-+  mv(ch1, tmp6);
-+  if (isLL) {
-+    j(BMLOOPSTR1_AFTER_LOAD);
-+  } else {
-+    sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8
-+    j(BMLOOPSTR1_CMP);
-+  }
++    if (vm.isJavaPCDbg(pc)) {
++      if (vm.isClientCompiler()) {
++        // If the topmost frame is a Java frame, we are (pretty much)
++        // guaranteed to have a viable FP. We should be more robust
++        // than this (we have the potential for losing entire threads'
++        // stack traces) but need to see how much work we really have
++        // to do here. Searching the stack for an (SP, FP) pair is
++        // hard since it's easy to misinterpret inter-frame stack
++        // pointers as base-of-frame pointers; we also don't know the
++        // sizes of C1 frames (not registered in the nmethod) so can't
++        // derive them from SP.
 +
-+  bind(BMLOOPSTR1);
-+  shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift);
-+  (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
-+  shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift);
-+  (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
++        setValues(sp, fp, pc);
++        return true;
++      } else {
++        if (vm.getInterpreter().contains(pc)) {
++          if (DEBUG) {
++            System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " +
++                               sp + ", fp = " + fp + ", pc = " + pc);
++          }
++          setValues(sp, fp, pc);
++          return true;
++        }
 +
-+  bind(BMLOOPSTR1_AFTER_LOAD);
-+  sub(nlen_tmp, nlen_tmp, 1);
-+  bltz(nlen_tmp, BMLOOPSTR1_LASTCMP);
++        // For the server compiler, FP is not guaranteed to be valid
++        // for compiled code. In addition, an earlier attempt at a
++        // non-searching algorithm (see below) failed because the
++        // stack pointer from the thread context was pointing
++        // (considerably) beyond the ostensible end of the stack, into
++        // garbage; walking from the topmost frame back caused a crash.
++        //
++        // This algorithm takes the current PC as a given and tries to
++        // find the correct corresponding SP by walking up the stack
++        // and repeatedly performing stackwalks (very inefficient).
++        //
++        // FIXME: there is something wrong with stackwalking across
++        // adapter frames...this is likely to be the root cause of the
++        // failure with the simpler algorithm below.
 +
-+  bind(BMLOOPSTR1_CMP);
-+  beq(ch1, ch2, BMLOOPSTR1);
++        for (long offset = 0;
++             offset < regionInBytesToSearch;
++             offset += vm.getAddressSize()) {
++          try {
++            Address curSP = sp.addOffsetTo(offset);
++            Frame frame = new RISCV64Frame(curSP, null, pc);
++            RegisterMap map = thread.newRegisterMap(false);
++            while (frame != null) {
++              if (frame.isEntryFrame() && frame.entryFrameIsFirst()) {
++                // We were able to traverse all the way to the
++                // bottommost Java frame.
++                // This sp looks good. Keep it.
++                if (DEBUG) {
++                  System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc);
++                }
++                setValues(curSP, null, pc);
++                return true;
++              }
++              frame = frame.sender(map);
++            }
++          } catch (Exception e) {
++            if (DEBUG) {
++              System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset);
++            }
++            // Bad SP. Try another.
++          }
++        }
 +
-+  bind(BMSKIP);
-+  if (!isLL) {
-+    // if we've met UTF symbol while searching Latin1 pattern, then we can
-+    // skip needle_len symbols
-+    if (needle_isL != haystack_isL) {
-+      mv(result_tmp, needle_len);
++        // Were not able to find a plausible SP to go with this PC.
++        // Bail out.
++        return false;
++      }
 +    } else {
-+      mv(result_tmp, 1);
-+    }
-+    mv(t0, ASIZE);
-+    bgeu(skipch, t0, BMADV);
-+  }
-+  add(result_tmp, sp, skipch);
-+  lbu(result_tmp, Address(result_tmp)); // load skip offset
++      // If the current program counter was not known to us as a Java
++      // PC, we currently assume that we are in the run-time system
++      // and attempt to look to thread-local storage for saved SP and
++      // FP. Note that if these are null (because we were, in fact,
++      // in Java code, i.e., vtable stubs or similar, and the SA
++      // didn't have enough insight into the target VM to understand
++      // that) then we are going to lose the entire stack trace for
++      // the thread, which is sub-optimal. FIXME.
 +
-+  bind(BMADV);
-+  sub(nlen_tmp, needle_len, 1);
-+  // move haystack after bad char skip offset
-+  shadd(haystack, result_tmp, haystack, result, haystack_chr_shift);
-+  ble(haystack, haystack_end, BMLOOPSTR2);
-+  add(sp, sp, ASIZE);
-+  j(NOMATCH);
++      if (DEBUG) {
++        System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " +
++                           thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP());
++      }
++      if (thread.getLastJavaSP() == null) {
++        return false; // No known Java frames on stack
++      }
 +
-+  bind(BMLOOPSTR1_LASTCMP);
-+  bne(ch1, ch2, BMSKIP);
++      // The runtime has a nasty habit of not saving fp in the frame
++      // anchor, leaving us to grovel about in the stack to find a
++      // plausible address.  Fortunately, this only happens in
++      // compiled code; there we always have a valid PC, and we always
++      // push LR and FP onto the stack as a pair, with FP at the lower
++      // address.
++      pc = thread.getLastJavaPC();
++      fp = thread.getLastJavaFP();
++      sp = thread.getLastJavaSP();
 +
-+  bind(BMMATCH);
-+  sub(result, haystack, orig_haystack);
-+  if (!haystack_isL) {
-+    srli(result, result, 1);
-+  }
-+  add(sp, sp, ASIZE);
-+  j(DONE);
++      if (fp == null) {
++        CodeCache cc = vm.getCodeCache();
++        if (cc.contains(pc)) {
++          CodeBlob cb = cc.findBlob(pc);
++          if (DEBUG) {
++            System.out.println("FP is null.  Found blob frame size " + cb.getFrameSize());
++          }
++          // See if we can derive a frame pointer from SP and PC
++          long link_offset = cb.getFrameSize() - 2 * VM.getVM().getAddressSize();
++          if (link_offset >= 0) {
++            fp = sp.addOffsetTo(link_offset);
++          }
++        }
++      }
 +
-+  bind(LINEARSTUB);
-+  sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm
-+  bltz(t0, LINEARSEARCH);
-+  mv(result, zr);
-+  RuntimeAddress stub = NULL;
-+  if (isLL) {
-+    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll());
-+    assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
-+  } else if (needle_isL) {
-+    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul());
-+    assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
-+  } else {
-+    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu());
-+    assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
-+  }
-+  trampoline_call(stub);
-+  j(DONE);
++      // We found a PC in the frame anchor. Check that it's plausible, and
++      // if it is, use it.
++      if (vm.isJavaPCDbg(pc)) {
++        setValues(sp, fp, pc);
++      } else {
++        setValues(sp, fp, null);
++      }
 +
-+  bind(NOMATCH);
-+  mv(result, -1);
-+  j(DONE);
++      return true;
++    }
++  }
 +
-+  bind(LINEARSEARCH);
-+  string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae);
++  public Address getSP() { return spFound; }
++  public Address getFP() { return fpFound; }
++  /** May be null if getting values from thread-local storage; take
++      care to call the correct RISCV64Frame constructor to recover this if
++      necessary */
++  public Address getPC() { return pcFound; }
 +
-+  bind(DONE);
-+  BLOCK_COMMENT("} string_indexof");
++  private void setValues(Address sp, Address fp, Address pc) {
++    spFound = sp;
++    fpFound = fp;
++    pcFound = pc;
++  }
 +}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
+new file mode 100644
+index 0000000000..e372bc5f7b
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
+@@ -0,0 +1,554 @@
++/*
++ * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2019, Red Hat Inc.
++ * Copyright (c) 2021, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+// string_indexof
-+// result: x10
-+// src: x11
-+// src_count: x12
-+// pattern: x13
-+// pattern_count: x14 or 1/2/3/4
-+void MacroAssembler::string_indexof_linearscan(Register haystack, Register needle,
-+                                                  Register haystack_len, Register needle_len,
-+                                                  Register tmp1, Register tmp2,
-+                                                  Register tmp3, Register tmp4,
-+                                                  int needle_con_cnt, Register result, int ae)
-+{
-+  // Note:
-+  // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant
-+  // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1
-+  assert(needle_con_cnt <= 4, "Invalid needle constant count");
-+  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
++package sun.jvm.hotspot.runtime.riscv64;
 +
-+  Register ch1 = t0;
-+  Register ch2 = t1;
-+  Register hlen_neg = haystack_len, nlen_neg = needle_len;
-+  Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4;
++import java.util.*;
++import sun.jvm.hotspot.code.*;
++import sun.jvm.hotspot.compiler.*;
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.oops.*;
++import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.utilities.*;
 +
-+  bool isLL = ae == StrIntrinsicNode::LL;
++/** Specialization of and implementation of abstract methods of the
++    Frame class for the riscv64 family of CPUs. */
 +
-+  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
-+  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
-+  int needle_chr_shift = needle_isL ? 0 : 1;
-+  int haystack_chr_shift = haystack_isL ? 0 : 1;
-+  int needle_chr_size = needle_isL ? 1 : 2;
-+  int haystack_chr_size = haystack_isL ? 1 : 2;
++public class RISCV64Frame extends Frame {
++  private static final boolean DEBUG;
++  static {
++    DEBUG = System.getProperty("sun.jvm.hotspot.runtime.RISCV64.RISCV64Frame.DEBUG") != null;
++  }
 +
-+  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
-+                                   (load_chr_insn)&MacroAssembler::lhu;
-+  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
-+                                     (load_chr_insn)&MacroAssembler::lhu;
-+  load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu;
-+  load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld;
++  // Java frames
++  private static final int LINK_OFFSET                =  -2;
++  private static final int RETURN_ADDR_OFFSET         =  -1;
++  private static final int SENDER_SP_OFFSET           =   0;
 +
-+  Label DO1, DO2, DO3, MATCH, NOMATCH, DONE;
++  // Interpreter frames
++  private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -3;
++  private static final int INTERPRETER_FRAME_LAST_SP_OFFSET   = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_METHOD_OFFSET    = INTERPRETER_FRAME_LAST_SP_OFFSET - 1;
++  private static       int INTERPRETER_FRAME_MDX_OFFSET;         // Non-core builds only
++  private static       int INTERPRETER_FRAME_PADDING_OFFSET;
++  private static       int INTERPRETER_FRAME_MIRROR_OFFSET;
++  private static       int INTERPRETER_FRAME_CACHE_OFFSET;
++  private static       int INTERPRETER_FRAME_LOCALS_OFFSET;
++  private static       int INTERPRETER_FRAME_BCX_OFFSET;
++  private static       int INTERPRETER_FRAME_INITIAL_SP_OFFSET;
++  private static       int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET;
++  private static       int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET;
 +
-+  Register first = tmp3;
++  // Entry frames
++  private static       int ENTRY_FRAME_CALL_WRAPPER_OFFSET = -10;
 +
-+  if (needle_con_cnt == -1) {
-+    Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
++  // Native frames
++  private static final int NATIVE_FRAME_INITIAL_PARAM_OFFSET =  2;
 +
-+    sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2);
-+    bltz(t0, DOSHORT);
++  private static VMReg fp = new VMReg(8);
 +
-+    (this->*needle_load_1chr)(first, Address(needle), noreg);
-+    slli(t0, needle_len, needle_chr_shift);
-+    add(needle, needle, t0);
-+    neg(nlen_neg, t0);
-+    slli(t0, result_tmp, haystack_chr_shift);
-+    add(haystack, haystack, t0);
-+    neg(hlen_neg, t0);
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
 +
-+    bind(FIRST_LOOP);
-+    add(t0, haystack, hlen_neg);
-+    (this->*haystack_load_1chr)(ch2, Address(t0), noreg);
-+    beq(first, ch2, STR1_LOOP);
++  private static synchronized void initialize(TypeDataBase db) {
++    INTERPRETER_FRAME_MDX_OFFSET                  = INTERPRETER_FRAME_METHOD_OFFSET - 1;
++    INTERPRETER_FRAME_PADDING_OFFSET              = INTERPRETER_FRAME_MDX_OFFSET - 1;
++    INTERPRETER_FRAME_MIRROR_OFFSET               = INTERPRETER_FRAME_PADDING_OFFSET - 1;
++    INTERPRETER_FRAME_CACHE_OFFSET                = INTERPRETER_FRAME_MIRROR_OFFSET - 1;
++    INTERPRETER_FRAME_LOCALS_OFFSET               = INTERPRETER_FRAME_CACHE_OFFSET - 1;
++    INTERPRETER_FRAME_BCX_OFFSET                  = INTERPRETER_FRAME_LOCALS_OFFSET - 1;
++    INTERPRETER_FRAME_INITIAL_SP_OFFSET           = INTERPRETER_FRAME_BCX_OFFSET - 1;
++    INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET    = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
++    INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
++  }
 +
-+    bind(STR2_NEXT);
-+    add(hlen_neg, hlen_neg, haystack_chr_size);
-+    blez(hlen_neg, FIRST_LOOP);
-+    j(NOMATCH);
 +
-+    bind(STR1_LOOP);
-+    add(nlen_tmp, nlen_neg, needle_chr_size);
-+    add(hlen_tmp, hlen_neg, haystack_chr_size);
-+    bgez(nlen_tmp, MATCH);
++  // an additional field beyond sp and pc:
++  Address raw_fp; // frame pointer
++  private Address raw_unextendedSP;
 +
-+    bind(STR1_NEXT);
-+    add(ch1, needle, nlen_tmp);
-+    (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
-+    add(ch2, haystack, hlen_tmp);
-+    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
-+    bne(ch1, ch2, STR2_NEXT);
-+    add(nlen_tmp, nlen_tmp, needle_chr_size);
-+    add(hlen_tmp, hlen_tmp, haystack_chr_size);
-+    bltz(nlen_tmp, STR1_NEXT);
-+    j(MATCH);
++  private RISCV64Frame() {
++  }
 +
-+    bind(DOSHORT);
-+    if (needle_isL == haystack_isL) {
-+      sub(t0, needle_len, 2);
-+      bltz(t0, DO1);
-+      bgtz(t0, DO3);
++  private void adjustForDeopt() {
++    if ( pc != null) {
++      // Look for a deopt pc and if it is deopted convert to original pc
++      CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc);
++      if (cb != null && cb.isJavaMethod()) {
++        NMethod nm = (NMethod) cb;
++        if (pc.equals(nm.deoptHandlerBegin())) {
++          if (Assert.ASSERTS_ENABLED) {
++            Assert.that(this.getUnextendedSP() != null, "null SP in Java frame");
++          }
++          // adjust pc if frame is deoptimized.
++          pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset());
++          deoptimized = true;
++        }
++      }
 +    }
 +  }
 +
-+  if (needle_con_cnt == 4) {
-+    Label CH1_LOOP;
-+    (this->*load_4chr)(ch1, Address(needle), noreg);
-+    sub(result_tmp, haystack_len, 4);
-+    slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp
-+    add(haystack, haystack, tmp3);
-+    neg(hlen_neg, tmp3);
++  public RISCV64Frame(Address raw_sp, Address raw_fp, Address pc) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_sp;
++    this.raw_fp = raw_fp;
++    this.pc = pc;
++    adjustUnextendedSP();
 +
-+    bind(CH1_LOOP);
-+    add(ch2, haystack, hlen_neg);
-+    (this->*load_4chr)(ch2, Address(ch2), noreg);
-+    beq(ch1, ch2, MATCH);
-+    add(hlen_neg, hlen_neg, haystack_chr_size);
-+    blez(hlen_neg, CH1_LOOP);
-+    j(NOMATCH);
-+  }
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
 +
-+  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) {
-+    Label CH1_LOOP;
-+    BLOCK_COMMENT("string_indexof DO2 {");
-+    bind(DO2);
-+    (this->*load_2chr)(ch1, Address(needle), noreg);
-+    if (needle_con_cnt == 2) {
-+      sub(result_tmp, haystack_len, 2);
++    if (DEBUG) {
++      System.out.println("RISCV64Frame(sp, fp, pc): " + this);
++      dumpStack();
 +    }
-+    slli(tmp3, result_tmp, haystack_chr_shift);
-+    add(haystack, haystack, tmp3);
-+    neg(hlen_neg, tmp3);
-+
-+    bind(CH1_LOOP);
-+    add(tmp3, haystack, hlen_neg);
-+    (this->*load_2chr)(ch2, Address(tmp3), noreg);
-+    beq(ch1, ch2, MATCH);
-+    add(hlen_neg, hlen_neg, haystack_chr_size);
-+    blez(hlen_neg, CH1_LOOP);
-+    j(NOMATCH);
-+    BLOCK_COMMENT("} string_indexof DO2");
 +  }
 +
-+  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) {
-+    Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
-+    BLOCK_COMMENT("string_indexof DO3 {");
++  public RISCV64Frame(Address raw_sp, Address raw_fp) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_sp;
++    this.raw_fp = raw_fp;
 +
-+    bind(DO3);
-+    (this->*load_2chr)(first, Address(needle), noreg);
-+    (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg);
-+    if (needle_con_cnt == 3) {
-+      sub(result_tmp, haystack_len, 3);
++    // We cannot assume SP[-1] always contains a valid return PC (e.g. if
++    // the callee is a C/C++ compiled frame). If the PC is not known to
++    // Java then this.pc is null.
++    Address savedPC = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize());
++    if (VM.getVM().isJavaPCDbg(savedPC)) {
++      this.pc = savedPC;
 +    }
-+    slli(hlen_tmp, result_tmp, haystack_chr_shift);
-+    add(haystack, haystack, hlen_tmp);
-+    neg(hlen_neg, hlen_tmp);
 +
-+    bind(FIRST_LOOP);
-+    add(ch2, haystack, hlen_neg);
-+    (this->*load_2chr)(ch2, Address(ch2), noreg);
-+    beq(first, ch2, STR1_LOOP);
++    adjustUnextendedSP();
 +
-+    bind(STR2_NEXT);
-+    add(hlen_neg, hlen_neg, haystack_chr_size);
-+    blez(hlen_neg, FIRST_LOOP);
-+    j(NOMATCH);
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
 +
-+    bind(STR1_LOOP);
-+    add(hlen_tmp, hlen_neg, 2 * haystack_chr_size);
-+    add(ch2, haystack, hlen_tmp);
-+    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
-+    bne(ch1, ch2, STR2_NEXT);
-+    j(MATCH);
-+    BLOCK_COMMENT("} string_indexof DO3");
++    if (DEBUG) {
++      System.out.println("RISCV64Frame(sp, fp): " + this);
++      dumpStack();
++    }
 +  }
 +
-+  if (needle_con_cnt == -1 || needle_con_cnt == 1) {
-+    Label DO1_LOOP;
++  public RISCV64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_unextendedSp;
++    this.raw_fp = raw_fp;
++    this.pc = pc;
++    adjustUnextendedSP();
 +
-+    BLOCK_COMMENT("string_indexof DO1 {");
-+    bind(DO1);
-+    (this->*needle_load_1chr)(ch1, Address(needle), noreg);
-+    sub(result_tmp, haystack_len, 1);
-+    mv(tmp3, result_tmp);
-+    if (haystack_chr_shift) {
-+      slli(tmp3, result_tmp, haystack_chr_shift);
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("RISCV64Frame(sp, unextendedSP, fp, pc): " + this);
++      dumpStack();
 +    }
-+    add(haystack, haystack, tmp3);
-+    neg(hlen_neg, tmp3);
 +
-+    bind(DO1_LOOP);
-+    add(tmp3, haystack, hlen_neg);
-+    (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg);
-+    beq(ch1, ch2, MATCH);
-+    add(hlen_neg, hlen_neg, haystack_chr_size);
-+    blez(hlen_neg, DO1_LOOP);
-+    BLOCK_COMMENT("} string_indexof DO1");
 +  }
 +
-+  bind(NOMATCH);
-+  mv(result, -1);
-+  j(DONE);
-+
-+  bind(MATCH);
-+  srai(t0, hlen_neg, haystack_chr_shift);
-+  add(result, result_tmp, t0);
-+
-+  bind(DONE);
-+}
-+
-+// Compare strings.
-+void MacroAssembler::string_compare(Register str1, Register str2,
-+                                       Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
-+                                       Register tmp3, int ae)
-+{
-+  Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
-+          DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
-+          SHORT_LOOP_START, TAIL_CHECK, L;
++  public Object clone() {
++    RISCV64Frame frame = new RISCV64Frame();
++    frame.raw_sp = raw_sp;
++    frame.raw_unextendedSP = raw_unextendedSP;
++    frame.raw_fp = raw_fp;
++    frame.pc = pc;
++    frame.deoptimized = deoptimized;
++    return frame;
++  }
 +
-+  const int STUB_THRESHOLD = 64 + 8;
-+  bool isLL = ae == StrIntrinsicNode::LL;
-+  bool isLU = ae == StrIntrinsicNode::LU;
-+  bool isUL = ae == StrIntrinsicNode::UL;
++  public boolean equals(Object arg) {
++    if (arg == null) {
++      return false;
++    }
 +
-+  bool str1_isL = isLL || isLU;
-+  bool str2_isL = isLL || isUL;
++    if (!(arg instanceof RISCV64Frame)) {
++      return false;
++    }
 +
-+  // for L strings, 1 byte for 1 character
-+  // for U strings, 2 bytes for 1 character
-+  int str1_chr_size = str1_isL ? 1 : 2;
-+  int str2_chr_size = str2_isL ? 1 : 2;
-+  int minCharsInWord = isLL ? wordSize : wordSize / 2;
++    RISCV64Frame other = (RISCV64Frame) arg;
 +
-+  load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
-+  load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
++    return (AddressOps.equal(getSP(), other.getSP()) &&
++            AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) &&
++            AddressOps.equal(getFP(), other.getFP()) &&
++            AddressOps.equal(getPC(), other.getPC()));
++  }
 +
-+  BLOCK_COMMENT("string_compare {");
++  public int hashCode() {
++    if (raw_sp == null) {
++      return 0;
++    }
 +
-+  // Bizzarely, the counts are passed in bytes, regardless of whether they
-+  // are L or U strings, however the result is always in characters.
-+  if (!str1_isL) {
-+    sraiw(cnt1, cnt1, 1);
++    return raw_sp.hashCode();
 +  }
-+  if (!str2_isL) {
-+    sraiw(cnt2, cnt2, 1);
++
++  public String toString() {
++    return "sp: " + (getSP() == null? "null" : getSP().toString()) +
++         ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) +
++         ", fp: " + (getFP() == null? "null" : getFP().toString()) +
++         ", pc: " + (pc == null? "null" : pc.toString());
 +  }
 +
-+  // Compute the minimum of the string lengths and save the difference in result.
-+  sub(result, cnt1, cnt2);
-+  bgt(cnt1, cnt2, L);
-+  mv(cnt2, cnt1);
-+  bind(L);
++  // accessors for the instance variables
++  public Address getFP() { return raw_fp; }
++  public Address getSP() { return raw_sp; }
++  public Address getID() { return raw_sp; }
 +
-+  // A very short string
-+  li(t0, minCharsInWord);
-+  ble(cnt2, t0, SHORT_STRING);
++  // FIXME: not implemented yet
++  public boolean isSignalHandlerFrameDbg() { return false; }
++  public int     getSignalNumberDbg()      { return 0;     }
++  public String  getSignalNameDbg()        { return null;  }
 +
-+  // Compare longwords
-+  // load first parts of strings and finish initialization while loading
-+  {
-+    if (str1_isL == str2_isL) { // LL or UU
-+      // load 8 bytes once to compare
-+      ld(tmp1, Address(str1));
-+      beq(str1, str2, DONE);
-+      ld(tmp2, Address(str2));
-+      li(t0, STUB_THRESHOLD);
-+      bge(cnt2, t0, STUB);
-+      sub(cnt2, cnt2, minCharsInWord);
-+      beqz(cnt2, TAIL_CHECK);
-+      // convert cnt2 from characters to bytes
-+      if (!str1_isL) {
-+        slli(cnt2, cnt2, 1);
-+      }
-+      add(str2, str2, cnt2);
-+      add(str1, str1, cnt2);
-+      sub(cnt2, zr, cnt2);
-+    } else if (isLU) { // LU case
-+      lwu(tmp1, Address(str1));
-+      ld(tmp2, Address(str2));
-+      li(t0, STUB_THRESHOLD);
-+      bge(cnt2, t0, STUB);
-+      addi(cnt2, cnt2, -4);
-+      add(str1, str1, cnt2);
-+      sub(cnt1, zr, cnt2);
-+      slli(cnt2, cnt2, 1);
-+      add(str2, str2, cnt2);
-+      inflate_lo32(tmp3, tmp1);
-+      mv(tmp1, tmp3);
-+      sub(cnt2, zr, cnt2);
-+      addi(cnt1, cnt1, 4);
-+    } else { // UL case
-+      ld(tmp1, Address(str1));
-+      lwu(tmp2, Address(str2));
-+      li(t0, STUB_THRESHOLD);
-+      bge(cnt2, t0, STUB);
-+      addi(cnt2, cnt2, -4);
-+      slli(t0, cnt2, 1);
-+      sub(cnt1, zr, t0);
-+      add(str1, str1, t0);
-+      add(str2, str2, cnt2);
-+      inflate_lo32(tmp3, tmp2);
-+      mv(tmp2, tmp3);
-+      sub(cnt2, zr, cnt2);
-+      addi(cnt1, cnt1, 8);
++  public boolean isInterpretedFrameValid() {
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(isInterpretedFrame(), "Not an interpreted frame");
 +    }
-+    addi(cnt2, cnt2, isUL ? 4 : 8);
-+    bgez(cnt2, TAIL);
-+    xorr(tmp3, tmp1, tmp2);
-+    bnez(tmp3, DIFFERENCE);
 +
-+    // main loop
-+    bind(NEXT_WORD);
-+    if (str1_isL == str2_isL) { // LL or UU
-+      add(t0, str1, cnt2);
-+      ld(tmp1, Address(t0));
-+      add(t0, str2, cnt2);
-+      ld(tmp2, Address(t0));
-+      addi(cnt2, cnt2, 8);
-+    } else if (isLU) { // LU case
-+      add(t0, str1, cnt1);
-+      lwu(tmp1, Address(t0));
-+      add(t0, str2, cnt2);
-+      ld(tmp2, Address(t0));
-+      addi(cnt1, cnt1, 4);
-+      inflate_lo32(tmp3, tmp1);
-+      mv(tmp1, tmp3);
-+      addi(cnt2, cnt2, 8);
-+    } else { // UL case
-+      add(t0, str2, cnt2);
-+      lwu(tmp2, Address(t0));
-+      add(t0, str1, cnt1);
-+      ld(tmp1, Address(t0));
-+      inflate_lo32(tmp3, tmp2);
-+      mv(tmp2, tmp3);
-+      addi(cnt1, cnt1, 8);
-+      addi(cnt2, cnt2, 4);
++    // These are reasonable sanity checks
++    if (getFP() == null || getFP().andWithMask(0x3) != null) {
++      return false;
 +    }
-+    bgez(cnt2, TAIL);
 +
-+    xorr(tmp3, tmp1, tmp2);
-+    beqz(tmp3, NEXT_WORD);
-+    j(DIFFERENCE);
-+    bind(TAIL);
-+    xorr(tmp3, tmp1, tmp2);
-+    bnez(tmp3, DIFFERENCE);
-+    // Last longword.  In the case where length == 4 we compare the
-+    // same longword twice, but that's still faster than another
-+    // conditional branch.
-+    if (str1_isL == str2_isL) { // LL or UU
-+      ld(tmp1, Address(str1));
-+      ld(tmp2, Address(str2));
-+    } else if (isLU) { // LU case
-+      lwu(tmp1, Address(str1));
-+      ld(tmp2, Address(str2));
-+      inflate_lo32(tmp3, tmp1);
-+      mv(tmp1, tmp3);
-+    } else { // UL case
-+      lwu(tmp2, Address(str2));
-+      ld(tmp1, Address(str1));
-+      inflate_lo32(tmp3, tmp2);
-+      mv(tmp2, tmp3);
++    if (getSP() == null || getSP().andWithMask(0x3) != null) {
++      return false;
 +    }
-+    bind(TAIL_CHECK);
-+    xorr(tmp3, tmp1, tmp2);
-+    beqz(tmp3, DONE);
 +
-+    // Find the first different characters in the longwords and
-+    // compute their difference.
-+    bind(DIFFERENCE);
-+    ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb
-+    srl(tmp1, tmp1, result);
-+    srl(tmp2, tmp2, result);
-+    if (isLL) {
-+      andi(tmp1, tmp1, 0xFF);
-+      andi(tmp2, tmp2, 0xFF);
-+    } else {
-+      andi(tmp1, tmp1, 0xFFFF);
-+      andi(tmp2, tmp2, 0xFFFF);
++    if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) {
++      return false;
 +    }
-+    sub(result, tmp1, tmp2);
-+    j(DONE);
-+  }
-+
-+  bind(STUB);
-+  RuntimeAddress stub = NULL;
-+  switch (ae) {
-+    case StrIntrinsicNode::LL:
-+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL());
-+      break;
-+    case StrIntrinsicNode::UU:
-+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU());
-+      break;
-+    case StrIntrinsicNode::LU:
-+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU());
-+      break;
-+    case StrIntrinsicNode::UL:
-+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL());
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
-+  assert(stub.target() != NULL, "compare_long_string stub has not been generated");
-+  trampoline_call(stub);
-+  j(DONE);
-+
-+  bind(SHORT_STRING);
-+  // Is the minimum length zero?
-+  beqz(cnt2, DONE);
-+  // arrange code to do most branches while loading and loading next characters
-+  // while comparing previous
-+  (this->*str1_load_chr)(tmp1, Address(str1), t0);
-+  addi(str1, str1, str1_chr_size);
-+  addi(cnt2, cnt2, -1);
-+  beqz(cnt2, SHORT_LAST_INIT);
-+  (this->*str2_load_chr)(cnt1, Address(str2), t0);
-+  addi(str2, str2, str2_chr_size);
-+  j(SHORT_LOOP_START);
-+  bind(SHORT_LOOP);
-+  addi(cnt2, cnt2, -1);
-+  beqz(cnt2, SHORT_LAST);
-+  bind(SHORT_LOOP_START);
-+  (this->*str1_load_chr)(tmp2, Address(str1), t0);
-+  addi(str1, str1, str1_chr_size);
-+  (this->*str2_load_chr)(t0, Address(str2), t0);
-+  addi(str2, str2, str2_chr_size);
-+  bne(tmp1, cnt1, SHORT_LOOP_TAIL);
-+  addi(cnt2, cnt2, -1);
-+  beqz(cnt2, SHORT_LAST2);
-+  (this->*str1_load_chr)(tmp1, Address(str1), t0);
-+  addi(str1, str1, str1_chr_size);
-+  (this->*str2_load_chr)(cnt1, Address(str2), t0);
-+  addi(str2, str2, str2_chr_size);
-+  beq(tmp2, t0, SHORT_LOOP);
-+  sub(result, tmp2, t0);
-+  j(DONE);
-+  bind(SHORT_LOOP_TAIL);
-+  sub(result, tmp1, cnt1);
-+  j(DONE);
-+  bind(SHORT_LAST2);
-+  beq(tmp2, t0, DONE);
-+  sub(result, tmp2, t0);
 +
-+  j(DONE);
-+  bind(SHORT_LAST_INIT);
-+  (this->*str2_load_chr)(cnt1, Address(str2), t0);
-+  addi(str2, str2, str2_chr_size);
-+  bind(SHORT_LAST);
-+  beq(tmp1, cnt1, DONE);
-+  sub(result, tmp1, cnt1);
++    // These are hacks to keep us out of trouble.
++    // The problem with these is that they mask other problems
++    if (getFP().lessThanOrEqual(getSP())) {
++      // this attempts to deal with unsigned comparison above
++      return false;
++    }
 +
-+  bind(DONE);
++    if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) {
++      // stack frames shouldn't be large.
++      return false;
++    }
 +
-+  BLOCK_COMMENT("} string_compare");
-+}
++    return true;
++  }
 +
-+void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
-+                                      Register tmp4, Register tmp5, Register tmp6, Register result,
-+                                      Register cnt1, int elem_size) {
-+  Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR;
-+  Register tmp1 = t0;
-+  Register tmp2 = t1;
-+  Register cnt2 = tmp2;  // cnt2 only used in array length compare
-+  Register elem_per_word = tmp6;
-+  int log_elem_size = exact_log2(elem_size);
-+  int length_offset = arrayOopDesc::length_offset_in_bytes();
-+  int base_offset   = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
++  public Frame sender(RegisterMap regMap, CodeBlob cb) {
++    RISCV64RegisterMap map = (RISCV64RegisterMap) regMap;
 +
-+  assert(elem_size == 1 || elem_size == 2, "must be char or byte");
-+  assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6);
-+  li(elem_per_word, wordSize / elem_size);
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
 +
-+  BLOCK_COMMENT("arrays_equals {");
++    // Default is we done have to follow them. The sender_for_xxx will
++    // update it accordingly
++    map.setIncludeArgumentOops(false);
 +
-+  // if (a1 == a2), return true
-+  beq(a1, a2, SAME);
++    if (isEntryFrame())       return senderForEntryFrame(map);
++    if (isInterpretedFrame()) return senderForInterpreterFrame(map);
 +
-+  mv(result, false);
-+  beqz(a1, DONE);
-+  beqz(a2, DONE);
-+  lwu(cnt1, Address(a1, length_offset));
-+  lwu(cnt2, Address(a2, length_offset));
-+  bne(cnt2, cnt1, DONE);
-+  beqz(cnt1, SAME);
++    if(cb == null) {
++      cb = VM.getVM().getCodeCache().findBlob(getPC());
++    } else {
++      if (Assert.ASSERTS_ENABLED) {
++        Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same");
++      }
++    }
 +
-+  slli(tmp5, cnt1, 3 + log_elem_size);
-+  sub(tmp5, zr, tmp5);
-+  add(a1, a1, base_offset);
-+  add(a2, a2, base_offset);
-+  ld(tmp3, Address(a1, 0));
-+  ld(tmp4, Address(a2, 0));
-+  ble(cnt1, elem_per_word, SHORT); // short or same
++    if (cb != null) {
++      return senderForCompiledFrame(map, cb);
++    }
 +
-+  // Main 16 byte comparison loop with 2 exits
-+  bind(NEXT_DWORD); {
-+    ld(tmp1, Address(a1, wordSize));
-+    ld(tmp2, Address(a2, wordSize));
-+    sub(cnt1, cnt1, 2 * wordSize / elem_size);
-+    blez(cnt1, TAIL);
-+    bne(tmp3, tmp4, DONE);
-+    ld(tmp3, Address(a1, 2 * wordSize));
-+    ld(tmp4, Address(a2, 2 * wordSize));
-+    add(a1, a1, 2 * wordSize);
-+    add(a2, a2, 2 * wordSize);
-+    ble(cnt1, elem_per_word, TAIL2);
-+  } beq(tmp1, tmp2, NEXT_DWORD);
-+  j(DONE);
++    // Must be native-compiled frame, i.e. the marshaling code for native
++    // methods that exists in the core system.
++    return new RISCV64Frame(getSenderSP(), getLink(), getSenderPC());
++  }
 +
-+  bind(TAIL);
-+  xorr(tmp4, tmp3, tmp4);
-+  xorr(tmp2, tmp1, tmp2);
-+  sll(tmp2, tmp2, tmp5);
-+  orr(tmp5, tmp4, tmp2);
-+  j(IS_TMP5_ZR);
++  private Frame senderForEntryFrame(RISCV64RegisterMap map) {
++    if (DEBUG) {
++      System.out.println("senderForEntryFrame");
++    }
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++    // Java frame called from C; skip all C frames and return top C
++    // frame of that chunk as the sender
++    RISCV64JavaCallWrapper jcw = (RISCV64JavaCallWrapper) getEntryFrameCallWrapper();
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero");
++      Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack");
++    }
++    RISCV64Frame fr;
++    if (jcw.getLastJavaPC() != null) {
++      fr = new RISCV64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC());
++    } else {
++      fr = new RISCV64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP());
++    }
++    map.clear();
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map.getIncludeArgumentOops(), "should be set by clear");
++    }
++    return fr;
++  }
 +
-+  bind(TAIL2);
-+  bne(tmp1, tmp2, DONE);
++  //------------------------------------------------------------------------------
++  // frame::adjust_unextended_sp
++  private void adjustUnextendedSP() {
++    // If we are returning to a compiled MethodHandle call site, the
++    // saved_fp will in fact be a saved value of the unextended SP.  The
++    // simplest way to tell whether we are returning to such a call site
++    // is as follows:
 +
-+  bind(SHORT);
-+  xorr(tmp4, tmp3, tmp4);
-+  sll(tmp5, tmp4, tmp5);
++    CodeBlob cb = cb();
++    NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull();
++    if (senderNm != null) {
++      // If the sender PC is a deoptimization point, get the original
++      // PC.  For MethodHandle call site the unextended_sp is stored in
++      // saved_fp.
++      if (senderNm.isDeoptMhEntry(getPC())) {
++        raw_unextendedSP = getFP();
++      }
++      else if (senderNm.isDeoptEntry(getPC())) {
++      }
++      else if (senderNm.isMethodHandleReturn(getPC())) {
++        raw_unextendedSP = getFP();
++      }
++    }
++  }
 +
-+  bind(IS_TMP5_ZR);
-+  bnez(tmp5, DONE);
++  private Frame senderForInterpreterFrame(RISCV64RegisterMap map) {
++    if (DEBUG) {
++      System.out.println("senderForInterpreterFrame");
++    }
++    Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
++    Address sp = addressOfStackSlot(SENDER_SP_OFFSET);
++    // We do not need to update the callee-save register mapping because above
++    // us is either another interpreter frame or a converter-frame, but never
++    // directly a compiled frame.
++    // 11/24/04 SFG. With the removal of adapter frames this is no longer true.
++    // However c2 no longer uses callee save register for java calls so there
++    // are no callee register to find.
 +
-+  bind(SAME);
-+  mv(result, true);
-+  // That's it.
-+  bind(DONE);
++    if (map.getUpdateMap())
++      updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET));
 +
-+  BLOCK_COMMENT("} array_equals");
-+}
++    return new RISCV64Frame(sp, unextendedSP, getLink(), getSenderPC());
++  }
 +
-+// Compare Strings
++  private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) {
++    map.setLocation(fp, savedFPAddr);
++  }
 +
-+// For Strings we're passed the address of the first characters in a1
-+// and a2 and the length in cnt1.
-+// elem_size is the element size in bytes: either 1 or 2.
-+// There are two implementations.  For arrays >= 8 bytes, all
-+// comparisons (including the final one, which may overlap) are
-+// performed 8 bytes at a time.  For strings < 8 bytes, we compare a
-+// halfword, then a short, and then a byte.
++  private Frame senderForCompiledFrame(RISCV64RegisterMap map, CodeBlob cb) {
++    if (DEBUG) {
++      System.out.println("senderForCompiledFrame");
++    }
 +
-+void MacroAssembler::string_equals(Register a1, Register a2,
-+                                      Register result, Register cnt1, int elem_size)
-+{
-+  Label SAME, DONE, SHORT, NEXT_WORD;
-+  Register tmp1 = t0;
-+  Register tmp2 = t1;
++    //
++    // NOTE: some of this code is (unfortunately) duplicated  RISCV64CurrentFrameGuess
++    //
 +
-+  assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte");
-+  assert_different_registers(a1, a2, result, cnt1, t0, t1);
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
 +
-+  BLOCK_COMMENT("string_equals {");
++    // frame owned by optimizing compiler
++    if (Assert.ASSERTS_ENABLED) {
++        Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size");
++    }
++    Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize());
 +
-+  mv(result, false);
++    // The return_address is always the word on the stack
++    Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize());
 +
-+  // Check for short strings, i.e. smaller than wordSize.
-+  sub(cnt1, cnt1, wordSize);
-+  bltz(cnt1, SHORT);
++    // This is the saved value of FP which may or may not really be an FP.
++    // It is only an FP if the sender is an interpreter frame.
++    Address savedFPAddr = senderSP.addOffsetTo(-2 * VM.getVM().getAddressSize());
 +
-+  // Main 8 byte comparison loop.
-+  bind(NEXT_WORD); {
-+    ld(tmp1, Address(a1, 0));
-+    add(a1, a1, wordSize);
-+    ld(tmp2, Address(a2, 0));
-+    add(a2, a2, wordSize);
-+    sub(cnt1, cnt1, wordSize);
-+    bne(tmp1, tmp2, DONE);
-+  } bgtz(cnt1, NEXT_WORD);
++    if (map.getUpdateMap()) {
++      // Tell GC to use argument oopmaps for some runtime stubs that need it.
++      // For C1, the runtime stub might not have oop maps, so set this flag
++      // outside of update_register_map.
++      map.setIncludeArgumentOops(cb.callerMustGCArguments());
 +
-+  // Last longword.  In the case where length == 4 we compare the
-+  // same longword twice, but that's still faster than another
-+  // conditional branch.
-+  // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
-+  // length == 4.
-+  add(tmp1, a1, cnt1);
-+  ld(tmp1, Address(tmp1, 0));
-+  add(tmp2, a2, cnt1);
-+  ld(tmp2, Address(tmp2, 0));
-+  bne(tmp1, tmp2, DONE);
-+  j(SAME);
++      if (cb.getOopMaps() != null) {
++        ImmutableOopMapSet.updateRegisterMap(this, cb, map, true);
++      }
 +
-+  bind(SHORT);
-+  Label TAIL03, TAIL01;
++      // Since the prolog does the save and restore of FP there is no oopmap
++      // for it so we must fill in its location as if there was an oopmap entry
++      // since if our caller was compiled code there could be live jvm state in it.
++      updateMapWithSavedLink(map, savedFPAddr);
++    }
 +
-+  // 0-7 bytes left.
-+  andi(t0, cnt1, 4);
-+  beqz(t0, TAIL03);
-+  {
-+    lwu(tmp1, Address(a1, 0));
-+    add(a1, a1, 4);
-+    lwu(tmp2, Address(a2, 0));
-+    add(a2, a2, 4);
-+    bne(tmp1, tmp2, DONE);
++    return new RISCV64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC);
 +  }
 +
-+  bind(TAIL03);
-+  // 0-3 bytes left.
-+  andi(t0, cnt1, 2);
-+  beqz(t0, TAIL01);
-+  {
-+    lhu(tmp1, Address(a1, 0));
-+    add(a1, a1, 2);
-+    lhu(tmp2, Address(a2, 0));
-+    add(a2, a2, 2);
-+    bne(tmp1, tmp2, DONE);
++  protected boolean hasSenderPD() {
++    return true;
 +  }
 +
-+  bind(TAIL01);
-+  if (elem_size == 1) { // Only needed when comparing 1-byte elements
-+    // 0-1 bytes left.
-+    andi(t0, cnt1, 1);
-+    beqz(t0, SAME);
-+    {
-+      lbu(tmp1, a1, 0);
-+      lbu(tmp2, a2, 0);
-+      bne(tmp1, tmp2, DONE);
-+    }
++  public long frameSize() {
++    return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize());
 +  }
 +
-+  // Arrays are equal.
-+  bind(SAME);
-+  mv(result, true);
-+
-+  // That's it.
-+  bind(DONE);
-+  BLOCK_COMMENT("} string_equals");
-+}
-+
-+typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far);
-+typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label,
-+                                                              bool is_far, bool is_unordered);
-+
-+static conditional_branch_insn conditional_branches[] =
-+{
-+  /* SHORT branches */
-+  (conditional_branch_insn)&Assembler::beq,
-+  (conditional_branch_insn)&Assembler::bgt,
-+  NULL, // BoolTest::overflow
-+  (conditional_branch_insn)&Assembler::blt,
-+  (conditional_branch_insn)&Assembler::bne,
-+  (conditional_branch_insn)&Assembler::ble,
-+  NULL, // BoolTest::no_overflow
-+  (conditional_branch_insn)&Assembler::bge,
++    public Address getLink() {
++        try {
++            if (DEBUG) {
++                System.out.println("Reading link at " + addressOfStackSlot(LINK_OFFSET)
++                        + " = " + addressOfStackSlot(LINK_OFFSET).getAddressAt(0));
++            }
++            return addressOfStackSlot(LINK_OFFSET).getAddressAt(0);
++        } catch (Exception e) {
++            if (DEBUG)
++                System.out.println("Returning null");
++            return null;
++        }
++    }
 +
-+  /* UNSIGNED branches */
-+  (conditional_branch_insn)&Assembler::beq,
-+  (conditional_branch_insn)&Assembler::bgtu,
-+  NULL,
-+  (conditional_branch_insn)&Assembler::bltu,
-+  (conditional_branch_insn)&Assembler::bne,
-+  (conditional_branch_insn)&Assembler::bleu,
-+  NULL,
-+  (conditional_branch_insn)&Assembler::bgeu
-+};
++  public Address getUnextendedSP() { return raw_unextendedSP; }
 +
-+static float_conditional_branch_insn float_conditional_branches[] =
-+{
-+  /* FLOAT SHORT branches */
-+  (float_conditional_branch_insn)&MacroAssembler::float_beq,
-+  (float_conditional_branch_insn)&MacroAssembler::float_bgt,
-+  NULL,  // BoolTest::overflow
-+  (float_conditional_branch_insn)&MacroAssembler::float_blt,
-+  (float_conditional_branch_insn)&MacroAssembler::float_bne,
-+  (float_conditional_branch_insn)&MacroAssembler::float_ble,
-+  NULL, // BoolTest::no_overflow
-+  (float_conditional_branch_insn)&MacroAssembler::float_bge,
++  // Return address:
++  public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); }
++  public Address getSenderPC()     { return getSenderPCAddr().getAddressAt(0);      }
 +
-+  /* DOUBLE SHORT branches */
-+  (float_conditional_branch_insn)&MacroAssembler::double_beq,
-+  (float_conditional_branch_insn)&MacroAssembler::double_bgt,
-+  NULL,
-+  (float_conditional_branch_insn)&MacroAssembler::double_blt,
-+  (float_conditional_branch_insn)&MacroAssembler::double_bne,
-+  (float_conditional_branch_insn)&MacroAssembler::double_ble,
-+  NULL,
-+  (float_conditional_branch_insn)&MacroAssembler::double_bge
-+};
++  // return address of param, zero origin index.
++  public Address getNativeParamAddr(int idx) {
++    return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx);
++  }
 +
-+void MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) {
-+  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])),
-+         "invalid conditional branch index");
-+  (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far);
-+}
++  public Address getSenderSP()     { return addressOfStackSlot(SENDER_SP_OFFSET); }
 +
-+// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use
-+// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode().
-+void MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) {
-+  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])),
-+         "invalid float conditional branch index");
-+  int booltest_flag = cmpFlag & ~(MacroAssembler::double_branch_mask);
-+  (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far,
-+                                               (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true);
-+}
++  public Address addressOfInterpreterFrameLocals() {
++    return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET);
++  }
 +
-+void MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
-+  switch (cmpFlag) {
-+    case BoolTest::eq:
-+    case BoolTest::le:
-+      beqz(op1, L, is_far);
-+      break;
-+    case BoolTest::ne:
-+    case BoolTest::gt:
-+      bnez(op1, L, is_far);
-+      break;
-+    default:
-+      ShouldNotReachHere();
++  private Address addressOfInterpreterFrameBCX() {
++    return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET);
 +  }
-+}
 +
-+void MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
-+  switch (cmpFlag) {
-+    case BoolTest::eq:
-+      beqz(op1, L, is_far);
-+      break;
-+    case BoolTest::ne:
-+      bnez(op1, L, is_far);
-+      break;
-+    default:
-+      ShouldNotReachHere();
++  public int getInterpreterFrameBCI() {
++    // FIXME: this is not atomic with respect to GC and is unsuitable
++    // for use in a non-debugging, or reflective, system. Need to
++    // figure out how to express this.
++    Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0);
++    Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0);
++    Method method = (Method)Metadata.instantiateWrapperFor(methodHandle);
++    return bcpToBci(bcp, method);
 +  }
-+}
 +
-+void MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) {
-+  Label L;
-+  cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L);
-+  mv(dst, src);
-+  bind(L);
-+}
++  public Address addressOfInterpreterFrameMDX() {
++    return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET);
++  }
 +
-+// Set dst to NaN if any NaN input.
-+void MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2,
-+                                  bool is_double, bool is_min) {
-+  assert_different_registers(dst, src1, src2);
++  // expression stack
++  // (the max_stack arguments are used by the GC; see class FrameClosure)
 +
-+  Label Done;
-+  fsflags(zr);
-+  if (is_double) {
-+    is_min ? fmin_d(dst, src1, src2)
-+           : fmax_d(dst, src1, src2);
-+    // Checking NaNs
-+    flt_d(zr, src1, src2);
-+  } else {
-+    is_min ? fmin_s(dst, src1, src2)
-+           : fmax_s(dst, src1, src2);
-+    // Checking NaNs
-+    flt_s(zr, src1, src2);
++  public Address addressOfInterpreterFrameExpressionStack() {
++    Address monitorEnd = interpreterFrameMonitorEnd().address();
++    return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize());
 +  }
 +
-+  frflags(t0);
-+  beqz(t0, Done);
++  public int getInterpreterFrameExpressionStackDirection() { return -1; }
 +
-+  // In case of NaNs
-+  is_double ? fadd_d(dst, src1, src2)
-+            : fadd_s(dst, src1, src2);
++  // top of expression stack
++  public Address addressOfInterpreterFrameTOS() {
++    return getSP();
++  }
 +
-+  bind(Done);
-+}
++  /** Expression stack from top down */
++  public Address addressOfInterpreterFrameTOSAt(int slot) {
++    return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize());
++  }
 +
-+#endif // COMPILER2
++  public Address getInterpreterFrameSenderSP() {
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(isInterpretedFrame(), "interpreted frame expected");
++    }
++    return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
++  }
 +
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index c6b71bdbc3c..2ef28771e2e 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -851,6 +851,109 @@ class MacroAssembler: public Assembler {
-   void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire);
-   void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release);
- 
-+public:
-+  void string_compare(Register str1, Register str2,
-+                      Register cnt1, Register cnt2, Register result,
-+                      Register tmp1, Register tmp2, Register tmp3,
-+                      int ae);
++  // Monitors
++  public BasicObjectLock interpreterFrameMonitorBegin() {
++    return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET));
++  }
 +
-+  void string_indexof_char_short(Register str1, Register cnt1,
-+                                 Register ch, Register result,
-+                                 bool isL);
++  public BasicObjectLock interpreterFrameMonitorEnd() {
++    Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0);
++    if (Assert.ASSERTS_ENABLED) {
++      // make sure the pointer points inside the frame
++      Assert.that(AddressOps.gt(getFP(), result), "result must <  than frame pointer");
++      Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer");
++    }
++    return new BasicObjectLock(result);
++  }
 +
-+  void string_indexof_char(Register str1, Register cnt1,
-+                           Register ch, Register result,
-+                           Register tmp1, Register tmp2,
-+                           Register tmp3, Register tmp4,
-+                           bool isL);
++  public int interpreterFrameMonitorSize() {
++    return BasicObjectLock.size();
++  }
 +
-+  void string_indexof(Register str1, Register str2,
-+                      Register cnt1, Register cnt2,
-+                      Register tmp1, Register tmp2,
-+                      Register tmp3, Register tmp4,
-+                      Register tmp5, Register tmp6,
-+                      Register result, int ae);
++  // Method
++  public Address addressOfInterpreterFrameMethod() {
++    return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET);
++  }
 +
-+  void string_indexof_linearscan(Register haystack, Register needle,
-+                                 Register haystack_len, Register needle_len,
-+                                 Register tmp1, Register tmp2,
-+                                 Register tmp3, Register tmp4,
-+                                 int needle_con_cnt, Register result, int ae);
++  // Constant pool cache
++  public Address addressOfInterpreterFrameCPCache() {
++    return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET);
++  }
 +
-+  void arrays_equals(Register r1, Register r2,
-+                     Register tmp3, Register tmp4,
-+                     Register tmp5, Register tmp6,
-+                     Register result, Register cnt1,
-+                     int elem_size);
++  // Entry frames
++  public JavaCallWrapper getEntryFrameCallWrapper() {
++    return new RISCV64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0));
++  }
 +
-+  void string_equals(Register r1, Register r2,
-+                     Register result, Register cnt1,
-+                     int elem_size);
++  protected Address addressOfSavedOopResult() {
++    // offset is 2 for compiler2 and 3 for compiler1
++    return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) *
++                               VM.getVM().getAddressSize());
++  }
 +
-+  // refer to conditional_branches and float_conditional_branches
-+  static const int bool_test_bits = 3;
-+  static const int neg_cond_bits = 2;
-+  static const int unsigned_branch_mask = 1 << bool_test_bits;
-+  static const int double_branch_mask = 1 << bool_test_bits;
++  protected Address addressOfSavedReceiver() {
++    return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
++  }
++
++  private void dumpStack() {
++    for (Address addr = getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
++         AddressOps.lt(addr, getSP());
++         addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
++      System.out.println(addr + ": " + addr.getAddressAt(0));
++    }
++    System.out.println("-----------------------");
++    for (Address addr = getSP();
++         AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize()));
++         addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
++      System.out.println(addr + ": " + addr.getAddressAt(0));
++    }
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
+new file mode 100644
+index 0000000000..850758a7ed
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
+@@ -0,0 +1,59 @@
++/*
++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  // cmp
-+  void cmp_branch(int cmpFlag,
-+                  Register op1, Register op2,
-+                  Label& label, bool is_far = false);
++package sun.jvm.hotspot.runtime.riscv64;
 +
-+  void float_cmp_branch(int cmpFlag,
-+                        FloatRegister op1, FloatRegister op2,
-+                        Label& label, bool is_far = false);
++import java.util.*;
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.utilities.*;
 +
-+  void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op,
-+                                    Label& L, bool is_far = false);
++public class RISCV64JavaCallWrapper extends JavaCallWrapper {
++  private static AddressField lastJavaFPField;
 +
-+  void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op,
-+                               Label& L, bool is_far = false);
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
 +
-+  void enc_cmove(int cmpFlag,
-+                 Register op1, Register op2,
-+                 Register dst, Register src);
++  private static synchronized void initialize(TypeDataBase db) {
++    Type type = db.lookupType("JavaFrameAnchor");
 +
-+  void spill(Register r, bool is64, int offset) {
-+    is64 ? sd(r, Address(sp, offset))
-+         : sw(r, Address(sp, offset));
++    lastJavaFPField  = type.getAddressField("_last_Java_fp");
 +  }
 +
-+  void spill(FloatRegister f, bool is64, int offset) {
-+    is64 ? fsd(f, Address(sp, offset))
-+         : fsw(f, Address(sp, offset));
++  public RISCV64JavaCallWrapper(Address addr) {
++    super(addr);
 +  }
 +
-+  void spill(VectorRegister v, int offset) {
-+    add(t0, sp, offset);
-+    vs1r_v(v, t0);
++  public Address getLastJavaFP() {
++    return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset()));
 +  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java
+new file mode 100644
+index 0000000000..4aeb1c6f55
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java
+@@ -0,0 +1,53 @@
++/*
++ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+  void unspill(Register r, bool is64, int offset) {
-+    is64 ? ld(r, Address(sp, offset))
-+         : lw(r, Address(sp, offset));
-+  }
++package sun.jvm.hotspot.runtime.riscv64;
 +
-+  void unspillu(Register r, bool is64, int offset) {
-+    is64 ? ld(r, Address(sp, offset))
-+         : lwu(r, Address(sp, offset));
-+  }
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.runtime.*;
 +
-+  void unspill(FloatRegister f, bool is64, int offset) {
-+    is64 ? fld(f, Address(sp, offset))
-+         : flw(f, Address(sp, offset));
-+  }
++public class RISCV64RegisterMap extends RegisterMap {
 +
-+  void unspill(VectorRegister v, int offset) {
-+    add(t0, sp, offset);
-+    vl1r_v(v, t0);
++  /** This is the only public constructor */
++  public RISCV64RegisterMap(JavaThread thread, boolean updateMap) {
++    super(thread, updateMap);
 +  }
 +
-+  void minmax_FD(FloatRegister dst,
-+                 FloatRegister src1, FloatRegister src2,
-+                 bool is_double, bool is_min);
++  protected RISCV64RegisterMap(RegisterMap map) {
++    super(map);
++  }
 +
- };
- 
- #ifdef ASSERT
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 13546ab328b..2e7eed8fb52 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -997,7 +997,7 @@ void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
- #endif
- 
- void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
--  C2_MacroAssembler _masm(&cbuf);
-+  MacroAssembler _masm(&cbuf);
-   Assembler::CompressibleRegion cr(&_masm);
-   __ ebreak();
- }
-@@ -1015,7 +1015,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
- #endif
- 
-   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Assembler::CompressibleRegion cr(&_masm); // nops shall be 2-byte under RVC for alignment purposes.
-     for (int i = 0; i < _count; i++) {
-       __ nop();
-@@ -1074,7 +1074,7 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
- void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-   assert_cond(ra_ != NULL);
-   Compile* C = ra_->C;
--  C2_MacroAssembler _masm(&cbuf);
-+  MacroAssembler _masm(&cbuf);
- 
-   // n.b. frame size includes space for return pc and fp
-   const int framesize = C->output()->frame_size_in_bytes();
-@@ -1150,7 +1150,7 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
- void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-   assert_cond(ra_ != NULL);
-   Compile* C = ra_->C;
--  C2_MacroAssembler _masm(&cbuf);
-+  MacroAssembler _masm(&cbuf);
-   assert_cond(C != NULL);
-   int framesize = C->output()->frame_size_in_bytes();
- 
-@@ -1251,7 +1251,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo
-   int dst_offset = ra_->reg2offset(dst_lo);
- 
-   if (cbuf != NULL) {
--    C2_MacroAssembler _masm(cbuf);
-+    MacroAssembler _masm(cbuf);
-     Assembler::CompressibleRegion cr(&_masm);
-     switch (src_lo_rc) {
-       case rc_int:
-@@ -1371,7 +1371,7 @@ void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
- #endif
- 
- void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
--  C2_MacroAssembler _masm(&cbuf);
-+  MacroAssembler _masm(&cbuf);
- 
-   assert_cond(ra_ != NULL);
-   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
-@@ -1422,7 +1422,7 @@ void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
- void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
- {
-   // This is the unverified entry point.
--  C2_MacroAssembler _masm(&cbuf);
-+  MacroAssembler _masm(&cbuf);
- 
-   Label skip;
-   __ cmp_klass(j_rarg0, t1, t0, skip);
-@@ -1449,7 +1449,7 @@ int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
-   // j #exception_blob_entry_point
-   // Note that the code buffer's insts_mark is always relative to insts.
-   // That's why we must use the macroassembler to generate a handler.
--  C2_MacroAssembler _masm(&cbuf);
-+  MacroAssembler _masm(&cbuf);
-   address base = __ start_a_stub(size_exception_handler());
-   if (base == NULL) {
-     ciEnv::current()->record_failure("CodeCache is full");
-@@ -1467,7 +1467,7 @@ int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
- {
-   // Note that the code buffer's insts_mark is always relative to insts.
-   // That's why we must use the macroassembler to generate a handler.
--  C2_MacroAssembler _masm(&cbuf);
-+  MacroAssembler _masm(&cbuf);
-   address base = __ start_a_stub(size_deopt_handler());
-   if (base == NULL) {
-     ciEnv::current()->record_failure("CodeCache is full");
-@@ -1848,7 +1848,7 @@ encode %{
-   // BEGIN Non-volatile memory access
- 
-   enc_class riscv_enc_li_imm(iRegIorL dst, immIorL src) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Assembler::CompressibleRegion cr(&_masm);
-     int64_t con = (int64_t)$src$$constant;
-     Register dst_reg = as_Register($dst$$reg);
-@@ -1856,7 +1856,7 @@ encode %{
-   %}
- 
-   enc_class riscv_enc_mov_p(iRegP dst, immP src) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Register dst_reg = as_Register($dst$$reg);
-     address con = (address)$src$$constant;
-     if (con == NULL || con == (address)1) {
-@@ -1875,7 +1875,7 @@ encode %{
-   %}
- 
-   enc_class riscv_enc_mov_p1(iRegP dst) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Assembler::CompressibleRegion cr(&_masm);
-     Register dst_reg = as_Register($dst$$reg);
-     __ li(dst_reg, 1);
-@@ -1893,12 +1893,12 @@ encode %{
-   %}
- 
-   enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     __ load_byte_map_base($dst$$Register);
-   %}
- 
-   enc_class riscv_enc_mov_n(iRegN dst, immN src) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Register dst_reg = as_Register($dst$$reg);
-     address con = (address)$src$$constant;
-     if (con == NULL) {
-@@ -1911,13 +1911,13 @@ encode %{
-   %}
- 
-   enc_class riscv_enc_mov_zero(iRegNorP dst) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Register dst_reg = as_Register($dst$$reg);
-     __ mv(dst_reg, zr);
-   %}
- 
-   enc_class riscv_enc_mov_nk(iRegN dst, immNKlass src) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Register dst_reg = as_Register($dst$$reg);
-     address con = (address)$src$$constant;
-     if (con == NULL) {
-@@ -1930,42 +1930,42 @@ encode %{
-   %}
- 
-   enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
-                /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
-                /*result as bool*/ true);
-   %}
- 
-   enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
-                /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
-                /*result as bool*/ true);
-   %}
- 
-   enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-                /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
-                /*result as bool*/ true);
-   %}
- 
-   enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
-                /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
-                /*result as bool*/ true);
-   %}
- 
-   enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
-                /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
-                /*result as bool*/ true);
-   %}
- 
-   enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-                /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
-                /*result as bool*/ true);
-@@ -1974,13 +1974,13 @@ encode %{
-   // compare and branch instruction encodings
- 
-   enc_class riscv_enc_j(label lbl) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Label* L = $lbl$$label;
-     __ j(*L);
-   %}
- 
-   enc_class riscv_enc_far_cmpULtGe_imm0_branch(cmpOpULtGe cmp, iRegIorL op1, label lbl) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Label* L = $lbl$$label;
-     switch ($cmp$$cmpcode) {
-       case(BoolTest::ge):
-@@ -2004,7 +2004,7 @@ encode %{
- 
-     Label miss;
-     Label done;
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
-                                      NULL, &miss);
-     if ($primary) {
-@@ -2023,7 +2023,7 @@ encode %{
-   %}
- 
-   enc_class riscv_enc_java_static_call(method meth) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
- 
-     address addr = (address)$meth$$method;
-     address call = NULL;
-@@ -2055,7 +2055,7 @@ encode %{
-   %}
- 
-   enc_class riscv_enc_java_dynamic_call(method meth) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     int method_index = resolved_method_index(cbuf);
-     address call = __ ic_call((address)$meth$$method, method_index);
-     if (call == NULL) {
-@@ -2065,7 +2065,7 @@ encode %{
-   %}
- 
-   enc_class riscv_enc_call_epilog() %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     if (VerifyStackAtCalls) {
-       // Check that stack depth is unchanged: find majik cookie on stack
-       __ call_Unimplemented();
-@@ -2073,7 +2073,7 @@ encode %{
-   %}
- 
-   enc_class riscv_enc_java_to_runtime(method meth) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
- 
-     // some calls to generated routines (arraycopy code) are scheduled
-     // by C2 as runtime calls. if so we can call them using a jr (they
-@@ -2102,7 +2102,7 @@ encode %{
- 
-   // using the cr register as the bool result: 0 for success; others failed.
-   enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Register flag = t1;
-     Register oop = as_Register($object$$reg);
-     Register box = as_Register($box$$reg);
-@@ -2189,7 +2189,7 @@ encode %{
- 
-   // using cr flag to indicate the fast_unlock result: 0 for success; others failed.
-   enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Register flag = t1;
-     Register oop = as_Register($object$$reg);
-     Register box = as_Register($box$$reg);
-@@ -2262,7 +2262,7 @@ encode %{
-   // arithmetic encodings
- 
-   enc_class riscv_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Register dst_reg = as_Register($dst$$reg);
-     Register src1_reg = as_Register($src1$$reg);
-     Register src2_reg = as_Register($src2$$reg);
-@@ -2270,7 +2270,7 @@ encode %{
-   %}
- 
-   enc_class riscv_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Register dst_reg = as_Register($dst$$reg);
-     Register src1_reg = as_Register($src1$$reg);
-     Register src2_reg = as_Register($src2$$reg);
-@@ -2278,7 +2278,7 @@ encode %{
-   %}
- 
-   enc_class riscv_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Register dst_reg = as_Register($dst$$reg);
-     Register src1_reg = as_Register($src1$$reg);
-     Register src2_reg = as_Register($src2$$reg);
-@@ -2286,7 +2286,7 @@ encode %{
-   %}
- 
-   enc_class riscv_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Register dst_reg = as_Register($dst$$reg);
-     Register src1_reg = as_Register($src1$$reg);
-     Register src2_reg = as_Register($src2$$reg);
-@@ -2294,14 +2294,14 @@ encode %{
-   %}
- 
-   enc_class riscv_enc_tail_call(iRegP jump_target) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Assembler::CompressibleRegion cr(&_masm);
-     Register target_reg = as_Register($jump_target$$reg);
-     __ jr(target_reg);
-   %}
- 
-   enc_class riscv_enc_tail_jmp(iRegP jump_target) %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Assembler::CompressibleRegion cr(&_masm);
-     Register target_reg = as_Register($jump_target$$reg);
-     // exception oop should be in x10
-@@ -2312,12 +2312,12 @@ encode %{
-   %}
- 
-   enc_class riscv_enc_rethrow() %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
-   %}
- 
-   enc_class riscv_enc_ret() %{
--    C2_MacroAssembler _masm(&cbuf);
-+    MacroAssembler _masm(&cbuf);
-     Assembler::CompressibleRegion cr(&_masm);
-     __ ret();
-   %}
-@@ -8506,7 +8506,7 @@ instruct cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl)
-   format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpU_branch" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                   as_Register($op2$$reg), *($lbl$$label));
-   %}
- 
-@@ -8526,7 +8526,7 @@ instruct cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl)
-   format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpU_loop" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                   as_Register($op2$$reg), *($lbl$$label));
-   %}
- 
-@@ -8585,7 +8585,7 @@ instruct cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl)
-   format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpUL_branch" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                   as_Register($op2$$reg), *($lbl$$label));
-   %}
- 
-@@ -8604,7 +8604,7 @@ instruct cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl)
-   format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpUL_loop" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                   as_Register($op2$$reg), *($lbl$$label));
-   %}
- 
-@@ -8625,7 +8625,7 @@ instruct cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
-   format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpP_branch" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                   as_Register($op2$$reg), *($lbl$$label));
-   %}
- 
-@@ -8645,7 +8645,7 @@ instruct cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
-   format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpP_loop" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                   as_Register($op2$$reg), *($lbl$$label));
-   %}
- 
-@@ -8666,7 +8666,7 @@ instruct cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
-   format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpN_branch" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                   as_Register($op2$$reg), *($lbl$$label));
-   %}
- 
-@@ -8686,7 +8686,7 @@ instruct cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
-   format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpN_loop" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                   as_Register($op2$$reg), *($lbl$$label));
-   %}
- 
-@@ -8741,7 +8741,7 @@ instruct cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
-   format %{ "double_b$cmp $op1, $op2\t#@cmpD_branch"%}
- 
-   ins_encode %{
--    __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-+    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-                         as_FloatRegister($op2$$reg), *($lbl$$label));
-   %}
- 
-@@ -8759,7 +8759,7 @@ instruct cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
-   format %{ "double_b$cmp $op1, $op2\t#@cmpD_loop"%}
- 
-   ins_encode %{
--    __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-+    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-                         as_FloatRegister($op2$$reg), *($lbl$$label));
-   %}
- 
-@@ -9080,7 +9080,7 @@ instruct far_cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{
-   format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_branch" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                        as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
-   %}
- 
-@@ -9095,7 +9095,7 @@ instruct far_cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{
-   format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_loop" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                        as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
-   %}
- 
-@@ -9138,7 +9138,7 @@ instruct far_cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{
-   format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpUL_branch" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                        as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
-   %}
- 
-@@ -9153,7 +9153,7 @@ instruct far_cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{
-   format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpUL_loop" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                        as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
-   %}
- 
-@@ -9171,7 +9171,7 @@ instruct far_cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
-   format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpP_branch" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                        as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
-   %}
- 
-@@ -9189,7 +9189,7 @@ instruct far_cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
-   format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpP_loop" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                        as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
-   %}
- 
-@@ -9207,7 +9207,7 @@ instruct far_cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
-   format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpN_branch" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                        as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
-   %}
- 
-@@ -9225,7 +9225,7 @@ instruct far_cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
-   format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpN_loop" %}
- 
-   ins_encode %{
--    __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-                   as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
-   %}
- 
-@@ -9276,7 +9276,7 @@ instruct far_cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
-   format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_branch"%}
- 
-   ins_encode %{
--    __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-+    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-                         as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true);
-   %}
- 
-@@ -9292,7 +9292,7 @@ instruct far_cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
-   format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_loop"%}
- 
-   ins_encode %{
--    __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-+    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-                         as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true);
-   %}
- 
-@@ -9616,7 +9616,7 @@ instruct cmovI_cmpU(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOpU cop)
-          %}
- 
-   ins_encode %{
--    __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
-+    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
-                  as_Register($op1$$reg), as_Register($op2$$reg),
-                  as_Register($dst$$reg), as_Register($src$$reg));
-   %}
-@@ -9673,7 +9673,7 @@ instruct cmovL_cmpUL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOpU cop)
-          %}
- 
-   ins_encode %{
--    __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
-+    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
-                  as_Register($op1$$reg), as_Register($op2$$reg),
-                  as_Register($dst$$reg), as_Register($src$$reg));
-   %}
-@@ -9691,7 +9691,7 @@ instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop)
-          %}
- 
-   ins_encode %{
--    __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask,
-+    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
-                  as_Register($op1$$reg), as_Register($op2$$reg),
-                  as_Register($dst$$reg), as_Register($src$$reg));
-   %}
-
-From 115cd21290080b157d0ca8b7080e66ebd814fbdb Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 16:15:18 +0800
-Subject: [PATCH 091/140] Revert JDK-8222297: IRT_ENTRY/IRT_LEAF etc are the
- same as JRT && JDK-8263709: Cleanup THREAD/TRAPS/CHECK usage in JRT_ENTRY
- routines
-
----
- src/hotspot/cpu/riscv/interpreterRT_riscv.cpp | 8 ++++----
- 1 file changed, 4 insertions(+), 4 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
-index d93530d8564..776b0787238 100644
---- a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
-@@ -278,12 +278,12 @@ class SlowSignatureHandler
- };
- 
- 
--JRT_ENTRY(address,
--          InterpreterRuntime::slow_signature_handler(JavaThread* current,
-+IRT_ENTRY(address,
-+          InterpreterRuntime::slow_signature_handler(JavaThread* thread,
-                                                      Method* method,
-                                                      intptr_t* from,
-                                                      intptr_t* to))
--  methodHandle m(current, (Method*)method);
-+  methodHandle m(thread, (Method*)method);
-   assert(m->is_native(), "sanity check");
- 
-   // handle arguments
-@@ -292,4 +292,4 @@ JRT_ENTRY(address,
- 
-   // return result handler
-   return Interpreter::result_handler(m->result_type());
--JRT_END
-+IRT_END
-
-From 6cbf43d5f095aef93ef0bf595f51019a03cc1989 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 16:20:06 +0800
-Subject: [PATCH 092/140] Revert JDK-8245289: Clean up offset code in
- JavaClasses
-
----
- src/hotspot/cpu/riscv/methodHandles_riscv.cpp  | 18 +++++++++---------
- .../templateInterpreterGenerator_riscv.cpp     |  2 +-
- 2 files changed, 10 insertions(+), 10 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
-index 4442b5991b1..e070b8096a6 100644
---- a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
-@@ -53,7 +53,7 @@ void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_
-     verify_klass(_masm, klass_reg, VM_CLASS_ID(java_lang_Class),
-                  "MH argument is a Class");
-   }
--  __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset()));
-+  __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes()));
- }
- 
- #ifdef ASSERT
-@@ -140,13 +140,13 @@ void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
- 
-   // Load the invoker, as MH -> MH.form -> LF.vmentry
-   __ verify_oop(recv);
--  __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset())), temp2);
-+  __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), temp2);
-   __ verify_oop(method_temp);
--  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset())), temp2);
-+  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), temp2);
-   __ verify_oop(method_temp);
--  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset())), temp2);
-+  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())), temp2);
-   __ verify_oop(method_temp);
--  __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())), noreg, noreg);
-+  __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())), noreg, noreg);
- 
-   if (VerifyMethodHandles && !for_compiler_entry) {
-     // make sure recv is already on stack
-@@ -284,10 +284,10 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
-                    "MemberName required for invokeVirtual etc.");
-     }
- 
--    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset()));
--    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset()));
--    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset()));
--    Address vmtarget_method( xmethod, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset()));
-+    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()));
-+    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()));
-+    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes()));
-+    Address vmtarget_method( xmethod, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes()));
- 
-     Register temp1_recv_klass = temp1;
-     if (iid != vmIntrinsics::_linkToStatic) {
-diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-index 8aea4eca048..ce6166030b4 100644
---- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-@@ -894,7 +894,7 @@ address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
- 
-   address entry = __ pc();
- 
--  const int referent_offset = java_lang_ref_Reference::referent_offset();
-+  const int referent_offset = java_lang_ref_Reference::referent_offset;
-   guarantee(referent_offset > 0, "referent offset not initialized");
- 
-   Label slow_path;
-
-From 8c9b9f4246f4ede3c31f59749f9d4bc625f106b3 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 16:30:35 +0800
-Subject: [PATCH 093/140] Revert JDK-8242629: Remove references to deprecated
- java.util.Observer and Observable
-
----
- .../runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java   | 2 --
- .../classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java   | 2 --
- .../sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java | 2 --
- 3 files changed, 6 deletions(-)
-
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
-index f2e224f28ee..5c2b6e0e3ea 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
-@@ -34,8 +34,6 @@
- import sun.jvm.hotspot.runtime.riscv64.*;
- import sun.jvm.hotspot.types.*;
- import sun.jvm.hotspot.utilities.*;
--import sun.jvm.hotspot.utilities.Observable;
--import sun.jvm.hotspot.utilities.Observer;
- 
- public class LinuxRISCV64JavaThreadPDAccess implements JavaThreadPDAccess {
-   private static AddressField  lastJavaFPField;
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
-index df280005d72..e372bc5f7be 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
-@@ -34,8 +34,6 @@
- import sun.jvm.hotspot.runtime.*;
- import sun.jvm.hotspot.types.*;
- import sun.jvm.hotspot.utilities.*;
--import sun.jvm.hotspot.utilities.Observable;
--import sun.jvm.hotspot.utilities.Observer;
- 
- /** Specialization of and implementation of abstract methods of the
-     Frame class for the riscv64 family of CPUs. */
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
-index d0ad2b559a6..850758a7ed4 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
-@@ -31,8 +31,6 @@
- import sun.jvm.hotspot.types.*;
- import sun.jvm.hotspot.runtime.*;
- import sun.jvm.hotspot.utilities.*;
--import sun.jvm.hotspot.utilities.Observable;
--import sun.jvm.hotspot.utilities.Observer;
- 
- public class RISCV64JavaCallWrapper extends JavaCallWrapper {
-   private static AddressField lastJavaFPField;
-
-From 43f2a4fec6b4922fa8c187deda310ad636aeed2e Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 16:33:56 +0800
-Subject: [PATCH 094/140] Revert JDK-8256155: Allow multiple large page sizes
- to be used on Linux
-
----
- src/hotspot/os/linux/os_linux.cpp | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
-index 6f75e623a9a..7fc9588301b 100644
---- a/src/hotspot/os/linux/os_linux.cpp
-+++ b/src/hotspot/os/linux/os_linux.cpp
-@@ -4078,7 +4078,8 @@ size_t os::Linux::find_large_page_size() {
-     IA64_ONLY(256 * M)
-     PPC_ONLY(4 * M)
-     S390_ONLY(1 * M)
--    SPARC_ONLY(4 * M);
-+    SPARC_ONLY(4 * M)
-+    RISCV64_ONLY(2 * M);
- #endif // ZERO
- 
-   FILE *fp = fopen("/proc/meminfo", "r");
-
-From a93191be0155882a0f4d92bba4de9fdf4f508a4a Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 16:38:53 +0800
-Subject: [PATCH 095/140] Revert JDK-8252204: AArch64: Implement SHA3
- accelerator/intrinsic
-
----
- src/hotspot/cpu/riscv/vm_version_riscv.cpp | 5 -----
- 1 file changed, 5 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-index d4b79162d84..50ee7edb708 100644
---- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-@@ -82,11 +82,6 @@ void VM_Version::initialize() {
-     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
-   }
- 
--  if (UseSHA3Intrinsics) {
--    warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
--    FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
--  }
--
-   if (UseCRC32Intrinsics) {
-     warning("CRC32 intrinsics are not available on this CPU.");
-     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
-
-From 29acd4f1bb99e856418f7d9d3da4f205812b1663 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 16:43:23 +0800
-Subject: [PATCH 096/140] Revert JDK-8253717: Relocate stack overflow code out
- of thread.hpp/cpp && JDK-8255766: Fix linux+arm64 build after 8254072
-
----
- src/hotspot/cpu/riscv/macroAssembler_riscv.cpp               | 2 +-
- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp                | 4 ++--
- src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp | 2 +-
- 3 files changed, 4 insertions(+), 4 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index d175a62aeeb..d94074b4a3c 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -1666,7 +1666,7 @@ void MacroAssembler::bang_stack_size(Register size, Register tmp) {
-   // was post-decremented.)  Skip this address by starting at i=1, and
-   // touch a few more pages below.  N.B.  It is important to touch all
-   // the way down to and including i=StackShadowPages.
--  for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) {
-+  for (int i = 0; i < (int)(JavaThread::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) {
-     // this could be any sized move but this is can be a debugging crumb
-     // so the bigger the better.
-     sub(tmp, tmp, os::vm_page_size());
-diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-index ae414224c5b..dc3ac548d73 100644
---- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-@@ -1252,7 +1252,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
-   __ nop();
- 
-   // Generate stack overflow check
--  __ bang_stack_with_offset(checked_cast<int>(StackOverflow::stack_shadow_zone_size()));
-+  __ bang_stack_with_offset((int)JavaThread::stack_shadow_zone_size());
- 
-   // Generate a new frame for the wrapper.
-   __ enter();
-@@ -1551,7 +1551,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
-   Label reguard;
-   Label reguard_done;
-   __ lbu(t0, Address(xthread, JavaThread::stack_guard_state_offset()));
--  __ mv(t1, StackOverflow::stack_guard_yellow_reserved_disabled);
-+  __ mv(t1, JavaThread::stack_guard_yellow_reserved_disabled);
-   __ beq(t0, t1, reguard);
-   __ bind(reguard_done);
- 
-diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-index ce6166030b4..e639fa7e12f 100644
---- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-@@ -1248,7 +1248,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
-   {
-     Label no_reguard;
-     __ lwu(t0, Address(xthread, in_bytes(JavaThread::stack_guard_state_offset())));
--    __ addi(t1, zr, (u1)StackOverflow::stack_guard_yellow_reserved_disabled);
-+    __ addi(t1, zr, (u1)JavaThread::stack_guard_yellow_reserved_disabled);
-     __ bne(t0, t1, no_reguard);
- 
-     __ pusha(); // only save smashed registers
-
-From 6fa17c662dd2488108809e77dcff921bb475813c Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 16:50:51 +0800
-Subject: [PATCH 097/140] Revert JDK-8258459: Decouple gc_globals.hpp from
- globals.hpp
-
----
- src/hotspot/cpu/riscv/templateTable_riscv.cpp | 1 -
- 1 file changed, 1 deletion(-)
-
-diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-index 1f4409a9c9a..84b1afc7dc6 100644
---- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-@@ -28,7 +28,6 @@
- #include "asm/macroAssembler.inline.hpp"
- #include "gc/shared/barrierSetAssembler.hpp"
- #include "gc/shared/collectedHeap.hpp"
--#include "gc/shared/tlab_globals.hpp"
- #include "interpreter/interp_masm.hpp"
- #include "interpreter/interpreter.hpp"
- #include "interpreter/interpreterRuntime.hpp"
-
-From bcc26e749ccc20db5a4ba51c2cf8740a908a8a74 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 16:56:58 +0800
-Subject: [PATCH 098/140] Revert JDK-8223136: Move compressed oops functions to
- CompressedOops class
-
----
- .../cpu/riscv/macroAssembler_riscv.cpp        | 64 +++++++++----------
- .../cpu/riscv/macroAssembler_riscv.hpp        |  1 -
- src/hotspot/cpu/riscv/riscv.ad                | 10 +--
- 3 files changed, 37 insertions(+), 38 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index d94074b4a3c..becc1656358 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -1318,10 +1318,10 @@ int MacroAssembler::patch_oop(address insn_addr, address o) {
- void MacroAssembler::reinit_heapbase() {
-   if (UseCompressedOops) {
-     if (Universe::is_fully_initialized()) {
--      mv(xheapbase, CompressedOops::ptrs_base());
-+      mv(xheapbase, Universe::narrow_ptrs_base());
-     } else {
-       int32_t offset = 0;
--      la_patchable(xheapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr()), offset);
-+      la_patchable(xheapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()), offset);
-       ld(xheapbase, Address(xheapbase, offset));
-     }
-   }
-@@ -1596,8 +1596,8 @@ void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, R
- void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L) {
-   if (UseCompressedClassPointers) {
-       lwu(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
--    if (CompressedKlassPointers::base() == NULL) {
--      slli(tmp, tmp, CompressedKlassPointers::shift());
-+    if (Universe::narrow_klass_base() == NULL) {
-+      slli(tmp, tmp, Universe::narrow_klass_shift());
-       beq(trial_klass, tmp, L);
-       return;
-     }
-@@ -1745,9 +1745,9 @@ void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
- // Algorithm must match CompressedOops::encode.
- void MacroAssembler::encode_heap_oop(Register d, Register s) {
-   verify_oop(s, "broken oop in encode_heap_oop");
--  if (CompressedOops::base() == NULL) {
--    if (CompressedOops::shift() != 0) {
--      assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
-+  if (Universe::narrow_oop_base() == NULL) {
-+    if (Universe::narrow_oop_shift() != 0) {
-+      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
-       srli(d, s, LogMinObjAlignmentInBytes);
-     } else {
-       mv(d, s);
-@@ -1758,9 +1758,9 @@ void MacroAssembler::encode_heap_oop(Register d, Register s) {
-     bgez(d, notNull);
-     mv(d, zr);
-     bind(notNull);
--    if (CompressedOops::shift() != 0) {
--      assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
--      srli(d, d, CompressedOops::shift());
-+    if (Universe::narrow_oop_shift() != 0) {
-+      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
-+      srli(d, d, Universe::narrow_oop_shift());
-     }
-   }
- }
-@@ -1799,9 +1799,9 @@ void  MacroAssembler::decode_klass_not_null(Register r) {
- void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) {
-   assert(UseCompressedClassPointers, "should only be used for compressed headers");
- 
--  if (CompressedKlassPointers::base() == NULL) {
--    if (CompressedKlassPointers::shift() != 0) {
--      assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
-+  if (Universe::narrow_klass_base() == NULL) {
-+    if (Universe::narrow_klass_shift() != 0) {
-+      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-       slli(dst, src, LogKlassAlignmentInBytes);
-     } else {
-       mv(dst, src);
-@@ -1815,10 +1815,10 @@ void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register
-   }
- 
-   assert_different_registers(src, xbase);
--  li(xbase, (uintptr_t)CompressedKlassPointers::base());
-+  li(xbase, (uintptr_t)Universe::narrow_klass_base());
- 
--  if (CompressedKlassPointers::shift() != 0) {
--    assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
-+  if (Universe::narrow_klass_shift() != 0) {
-+    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-     assert_different_registers(t0, xbase);
-     shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes);
-   } else {
-@@ -1835,9 +1835,9 @@ void MacroAssembler::encode_klass_not_null(Register r) {
- void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) {
-   assert(UseCompressedClassPointers, "should only be used for compressed headers");
- 
--  if (CompressedKlassPointers::base() == NULL) {
--    if (CompressedKlassPointers::shift() != 0) {
--      assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
-+  if (Universe::narrow_klass_base() == NULL) {
-+    if (Universe::narrow_klass_shift() != 0) {
-+      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-       srli(dst, src, LogKlassAlignmentInBytes);
-     } else {
-       mv(dst, src);
-@@ -1845,8 +1845,8 @@ void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register
-     return;
-   }
- 
--  if (((uint64_t)(uintptr_t)CompressedKlassPointers::base() & 0xffffffff) == 0 &&
--      CompressedKlassPointers::shift() == 0) {
-+  if (((uint64_t)(uintptr_t)Universe::narrow_klass_base() & 0xffffffff) == 0 &&
-+      Universe::narrow_klass_shift() == 0) {
-     zero_extend(dst, src, 32);
-     return;
-   }
-@@ -1857,10 +1857,10 @@ void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register
-   }
- 
-   assert_different_registers(src, xbase);
--  li(xbase, (intptr_t)CompressedKlassPointers::base());
-+  li(xbase, (intptr_t)Universe::narrow_klass_base());
-   sub(dst, src, xbase);
--  if (CompressedKlassPointers::shift() != 0) {
--    assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
-+  if (Universe::narrow_klass_shift() != 0) {
-+    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-     srli(dst, dst, LogKlassAlignmentInBytes);
-   }
-   if (xbase == xheapbase) {
-@@ -1878,22 +1878,22 @@ void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
-   // Cannot assert, unverified entry point counts instructions (see .ad file)
-   // vtableStubs also counts instructions in pd_code_size_limit.
-   // Also do not verify_oop as this is called by verify_oop.
--  if (CompressedOops::shift() != 0) {
--    assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
-+  if (Universe::narrow_oop_shift() != 0) {
-+    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
-     slli(dst, src, LogMinObjAlignmentInBytes);
--    if (CompressedOops::base() != NULL) {
-+    if (Universe::narrow_oop_base() != NULL) {
-       add(dst, xheapbase, dst);
-     }
-   } else {
--    assert(CompressedOops::base() == NULL, "sanity");
-+    assert(Universe::narrow_oop_base() == NULL, "sanity");
-     mv(dst, src);
-   }
- }
- 
- void  MacroAssembler::decode_heap_oop(Register d, Register s) {
--  if (CompressedOops::base() == NULL) {
--    if (CompressedOops::shift() != 0 || d != s) {
--      slli(d, s, CompressedOops::shift());
-+  if (Universe::narrow_oop_base() == NULL) {
-+    if (Universe::narrow_oop_shift() != 0 || d != s) {
-+      slli(d, s, Universe::narrow_oop_shift());
-     }
-   } else {
-     Label done;
-@@ -3004,7 +3004,7 @@ void  MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
-   InstructionMark im(this);
-   RelocationHolder rspec = metadata_Relocation::spec(index);
-   code_section()->relocate(inst_mark(), rspec);
--  narrowKlass nk = CompressedKlassPointers::encode(k);
-+  narrowKlass nk = Klass::encode_klass(k);
-   li32(dst, nk);
-   zero_extend(dst, dst, 32);
- }
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index 2ef28771e2e..953bca3cbd8 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -29,7 +29,6 @@
- 
- #include "asm/assembler.hpp"
- #include "metaprogramming/enableIf.hpp"
--#include "oops/compressedOops.hpp"
- 
- // MacroAssembler extends Assembler by frequently used macros.
- //
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 2e7eed8fb52..24214964243 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1407,7 +1407,7 @@ void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
-   st->print_cr("# MachUEPNode");
-   if (UseCompressedClassPointers) {
-     st->print_cr("\tlwu t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass");
--    if (CompressedKlassPointers::shift() != 0) {
-+    if (Universe::narrow_klass_shift() != 0) {
-       st->print_cr("\tdecode_klass_not_null t0, t0");
-     }
-   } else {
-@@ -3255,7 +3255,7 @@ operand indOffL(iRegP reg, immLOffset off)
- 
- operand indirectN(iRegN reg)
- %{
--  predicate(CompressedOops::shift() == 0);
-+  predicate(Universe::narrow_oop_shift() == 0);
-   constraint(ALLOC_IN_RC(ptr_reg));
-   match(DecodeN reg);
-   op_cost(0);
-@@ -3270,7 +3270,7 @@ operand indirectN(iRegN reg)
- 
- operand indOffIN(iRegN reg, immIOffset off)
- %{
--  predicate(CompressedOops::shift() == 0);
-+  predicate(Universe::narrow_oop_shift() == 0);
-   constraint(ALLOC_IN_RC(ptr_reg));
-   match(AddP (DecodeN reg) off);
-   op_cost(0);
-@@ -3285,7 +3285,7 @@ operand indOffIN(iRegN reg, immIOffset off)
- 
- operand indOffLN(iRegN reg, immLOffset off)
- %{
--  predicate(CompressedOops::shift() == 0);
-+  predicate(Universe::narrow_oop_shift() == 0);
-   constraint(ALLOC_IN_RC(ptr_reg));
-   match(AddP (DecodeN reg) off);
-   op_cost(0);
-@@ -7947,7 +7947,7 @@ instruct convP2I(iRegINoSp dst, iRegP src) %{
- // in case of 32bit oops (heap < 4Gb).
- instruct convN2I(iRegINoSp dst, iRegN src)
- %{
--  predicate(CompressedOops::shift() == 0);
-+  predicate(Universe::narrow_oop_shift() == 0);
-   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
- 
-   ins_cost(ALU_COST);
-
-From 81d8ea9077484f1dd20033390cbd3c1844b1b966 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 17:11:20 +0800
-Subject: [PATCH 099/140] Revert JDK-8247912: Make narrowOop a scoped enum
-
----
- src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index becc1656358..e2841c28c37 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -1305,7 +1305,7 @@ int MacroAssembler::patch_oop(address insn_addr, address o) {
-   // instruction.
-   if (NativeInstruction::is_li32_at(insn_addr)) {
-     // Move narrow OOP
--    uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o));
-+    narrowOop n = CompressedOops::encode((oop)o);
-     return patch_imm_in_li32(insn_addr, (int32_t)n);
-   } else if (NativeInstruction::is_movptr_at(insn_addr)) {
-     // Move wide OOP
-
-From f980e03cb17804ff72958dd13505058048c04da8 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 17:20:05 +0800
-Subject: [PATCH 100/140] Revert JDK-8260467: Move well-known classes from
- systemDictionary.hpp to vmClasses.hpp
-
----
- src/hotspot/cpu/riscv/methodHandles_riscv.cpp | 11 +++++------
- src/hotspot/cpu/riscv/methodHandles_riscv.hpp |  4 ++--
- 2 files changed, 7 insertions(+), 8 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
-index e070b8096a6..fd907f77afb 100644
---- a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
-@@ -27,7 +27,6 @@
- #include "precompiled.hpp"
- #include "asm/macroAssembler.hpp"
- #include "classfile/javaClasses.inline.hpp"
--#include "classfile/vmClasses.hpp"
- #include "interpreter/interpreter.hpp"
- #include "interpreter/interpreterRuntime.hpp"
- #include "memory/allocation.inline.hpp"
-@@ -50,7 +49,7 @@
- void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {
-   assert_cond(_masm != NULL);
-   if (VerifyMethodHandles) {
--    verify_klass(_masm, klass_reg, VM_CLASS_ID(java_lang_Class),
-+    verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class),
-                  "MH argument is a Class");
-   }
-   __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes()));
-@@ -68,11 +67,11 @@ static int check_nonzero(const char* xname, int x) {
- 
- #ifdef ASSERT
- void MethodHandles::verify_klass(MacroAssembler* _masm,
--                                 Register obj, vmClassID klass_id,
-+                                 Register obj, SystemDictionary::WKID klass_id,
-                                  const char* error_message) {
-   assert_cond(_masm != NULL);
--  InstanceKlass** klass_addr = vmClasses::klass_addr_at(klass_id);
--  Klass* klass = vmClasses::klass_at(klass_id);
-+  InstanceKlass** klass_addr = SystemDictionary::well_known_klass_addr(klass_id);
-+  Klass* klass = SystemDictionary::well_known_klass(klass_id);
-   Register temp = t1;
-   Register temp2 = t0; // used by MacroAssembler::cmpptr
-   Label L_ok, L_bad;
-@@ -280,7 +279,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
-     // The method is a member invoker used by direct method handles.
-     if (VerifyMethodHandles) {
-       // make sure the trailing argument really is a MemberName (caller responsibility)
--      verify_klass(_masm, member_reg, VM_CLASS_ID(java_lang_invoke_MemberName),
-+      verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName),
-                    "MemberName required for invokeVirtual etc.");
-     }
- 
-diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
-index f73aba29d67..65493eba764 100644
---- a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
-@@ -36,11 +36,11 @@ enum /* platform_dependent_constants */ {
-   static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg);
- 
-   static void verify_klass(MacroAssembler* _masm,
--                           Register obj, vmClassID klass_id,
-+                           Register obj, SystemDictionary::WKID klass_id,
-                            const char* error_message = "wrong klass") NOT_DEBUG_RETURN;
- 
-   static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) {
--    verify_klass(_masm, mh_reg, VM_CLASS_ID(java_lang_invoke_MethodHandle),
-+    verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle),
-                  "reference is a MH");
-   }
- 
-
-From 2c68b064100b5abaca80926e213280ea82ff161a Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 17:32:15 +0800
-Subject: [PATCH 101/140] Revert JDK-8268858: Determine register pressure
- automatically by the number of available registers for allocation
-
----
- src/hotspot/cpu/riscv/c2_globals_riscv.hpp |  2 ++
- src/hotspot/cpu/riscv/riscv.ad             | 27 ----------------------
- 2 files changed, 2 insertions(+), 27 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
-index 6c301cdae04..33d78fb2f6f 100644
---- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
-@@ -44,8 +44,10 @@ define_pd_global(intx, CompileThreshold,             10000);
- 
- define_pd_global(intx, OnStackReplacePercentage,     140);
- define_pd_global(intx, ConditionalMoveLimit,         0);
-+define_pd_global(intx, FLOATPRESSURE,                32);
- define_pd_global(intx, FreqInlineSize,               325);
- define_pd_global(intx, MinJumpTableSize,             10);
-+define_pd_global(intx, INTPRESSURE,                  24);
- define_pd_global(intx, InteriorEntryAlignment,       16);
- define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
- define_pd_global(intx, LoopUnrollLimit,              60);
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 24214964243..c5e0ae23029 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1719,33 +1719,6 @@ bool Matcher::is_spillable_arg(int reg)
-   return can_be_java_arg(reg);
- }
- 
--uint Matcher::int_pressure_limit()
--{
--  // A derived pointer is live at CallNode and then is flagged by RA
--  // as a spilled LRG. Spilling heuristics(Spill-USE) explicitly skip
--  // derived pointers and lastly fail to spill after reaching maximum
--  // number of iterations. Lowering the default pressure threshold to
--  // (_NO_SPECIAL_REG32_mask.Size() minus 1) forces CallNode to become
--  // a high register pressure area of the code so that split_DEF can
--  // generate DefinitionSpillCopy for the derived pointer.
--  uint default_int_pressure_threshold = _NO_SPECIAL_REG32_mask.Size() - 1;
--  if (!PreserveFramePointer) {
--    // When PreserveFramePointer is off, frame pointer is allocatable,
--    // but different from other SOC registers, it is excluded from
--    // fatproj's mask because its save type is No-Save. Decrease 1 to
--    // ensure high pressure at fatproj when PreserveFramePointer is off.
--    // See check_pressure_at_fatproj().
--    default_int_pressure_threshold--;
--  }
--  return (INTPRESSURE == -1) ? default_int_pressure_threshold : INTPRESSURE;
--}
--
--uint Matcher::float_pressure_limit()
--{
--  // _FLOAT_REG_mask is generated by adlc from the float_reg register class.
--  return (FLOATPRESSURE == -1) ? _FLOAT_REG_mask.Size() : FLOATPRESSURE;
--}
--
- bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
-   return false;
- }
-
-From 932ebd6238ea7703dc3164e4506af332f6847592 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 17:51:12 +0800
-Subject: [PATCH 102/140] Revert JDK-8276563: Undefined Behaviour in class
- Assembler && 8257882: Implement linkToNative intrinsic on AArch64 (the
- register part)
-
----
- .../cpu/riscv/globalDefinitions_riscv.hpp     |   2 -
- src/hotspot/cpu/riscv/register_riscv.cpp      |   4 -
- src/hotspot/cpu/riscv/register_riscv.hpp      | 123 +++++++++++++-----
- 3 files changed, 91 insertions(+), 38 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
-index 2936837d951..ffd420da024 100644
---- a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
-@@ -47,6 +47,4 @@ const bool CCallingConventionRequiresIntsAsLongs = false;
- 
- #define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS false
- 
--#define USE_POINTERS_TO_REGISTER_IMPL_ARRAY
--
- #endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp
-index 96cf1996a83..ef60cb3bb05 100644
---- a/src/hotspot/cpu/riscv/register_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/register_riscv.cpp
-@@ -26,10 +26,6 @@
- #include "precompiled.hpp"
- #include "register_riscv.hpp"
- 
--REGISTER_IMPL_DEFINITION(Register, RegisterImpl, RegisterImpl::number_of_registers);
--REGISTER_IMPL_DEFINITION(FloatRegister, FloatRegisterImpl, FloatRegisterImpl::number_of_registers);
--REGISTER_IMPL_DEFINITION(VectorRegister, VectorRegisterImpl, VectorRegisterImpl::number_of_registers);
--
- const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers *
-                                           RegisterImpl::max_slots_per_register;
- 
-diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp
-index d697751f55f..f64a06eb89f 100644
---- a/src/hotspot/cpu/riscv/register_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/register_riscv.hpp
-@@ -47,13 +47,13 @@ typedef VMRegImpl* VMReg;
- 
- // Use Register as shortcut
- class RegisterImpl;
--typedef const RegisterImpl* Register;
-+typedef RegisterImpl* Register;
- 
--inline constexpr Register as_Register(int encoding);
-+inline Register as_Register(int encoding) {
-+  return (Register)(intptr_t) encoding;
-+}
- 
- class RegisterImpl: public AbstractRegisterImpl {
--  static constexpr Register first();
--
-  public:
-   enum {
-     number_of_registers      = 32,
-@@ -66,16 +66,16 @@ class RegisterImpl: public AbstractRegisterImpl {
-   };
- 
-   // derived registers, offsets, and addresses
--  const Register successor() const { return this + 1; }
-+  const Register successor() const { return as_Register(encoding() + 1); }
- 
-   // construction
--  inline friend constexpr Register as_Register(int encoding);
-+  inline friend Register as_Register(int encoding);
- 
-   VMReg as_VMReg() const;
- 
-   // accessors
-   int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
--  int encoding_nocheck() const    { return this - first(); }
-+  int encoding_nocheck() const    { return (intptr_t)this; }
-   bool is_valid() const           { return (unsigned)encoding_nocheck() < number_of_registers; }
-   const char* name() const;
- 
-@@ -93,9 +93,11 @@ class RegisterImpl: public AbstractRegisterImpl {
-     return encoding_nocheck() >= compressed_register_base &&
-            encoding_nocheck() <= compressed_register_top;
-   }
--};
- 
--REGISTER_IMPL_DECLARATION(Register, RegisterImpl, RegisterImpl::number_of_registers);
-+  // Return the bit which represents this register.  This is intended
-+  // to be ORed into a bitmask: for usage see class RegSet below.
-+  uint64_t bit(bool should_set = true) const { return should_set ? 1 << encoding() : 0; }
-+};
- 
- // The integer registers of the RISCV architecture
- 
-@@ -136,14 +138,14 @@ CONSTANT_REGISTER_DECLARATION(Register, x31,  (31));
- 
- // Use FloatRegister as shortcut
- class FloatRegisterImpl;
--typedef const FloatRegisterImpl* FloatRegister;
-+typedef FloatRegisterImpl* FloatRegister;
- 
--inline constexpr FloatRegister as_FloatRegister(int encoding);
-+inline FloatRegister as_FloatRegister(int encoding) {
-+  return (FloatRegister)(intptr_t) encoding;
++  public Object clone() {
++    RISCV64RegisterMap retval = new RISCV64RegisterMap(this);
++    return retval;
++  }
++
++  // no PD state to clear or copy:
++  protected void clearPD() {}
++  protected void initializePD() {}
++  protected void initializeFromPD(RegisterMap map) {}
++  protected Address getLocationPD(VMReg reg) { return null; }
 +}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
+index 7d7a6107ca..6552ce255f 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -54,7 +54,7 @@ public class PlatformInfo {
  
- // The implementation of floating point registers for the architecture
- class FloatRegisterImpl: public AbstractRegisterImpl {
--  static constexpr FloatRegister first();
--
-  public:
-   enum {
-     number_of_registers     = 32,
-@@ -155,18 +157,16 @@ class FloatRegisterImpl: public AbstractRegisterImpl {
-   };
- 
-   // construction
--  inline friend constexpr FloatRegister as_FloatRegister(int encoding);
-+  inline friend FloatRegister as_FloatRegister(int encoding);
- 
-   VMReg as_VMReg() const;
- 
-   // derived registers, offsets, and addresses
--  FloatRegister successor() const {
--    return as_FloatRegister((encoding() + 1) % (unsigned)number_of_registers);
--  }
-+  FloatRegister successor() const { return as_FloatRegister(encoding() + 1); }
- 
-   // accessors
-   int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
--  int encoding_nocheck() const    { return this - first(); }
-+  int encoding_nocheck() const    { return (intptr_t)this; }
-   int is_valid() const            { return (unsigned)encoding_nocheck() < number_of_registers; }
-   const char* name() const;
- 
-@@ -186,8 +186,6 @@ class FloatRegisterImpl: public AbstractRegisterImpl {
-   }
- };
+   public static boolean knownCPU(String cpu) {
+     final String[] KNOWN =
+-        new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64"};
++        new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64", "riscv64"};
  
--REGISTER_IMPL_DECLARATION(FloatRegister, FloatRegisterImpl, FloatRegisterImpl::number_of_registers);
--
- // The float registers of the RISCV architecture
+     for(String s : KNOWN) {
+       if(s.equals(cpu))
+diff --git a/test/hotspot/gtest/gc/shared/test_memset_with_concurrent_readers.cpp b/test/hotspot/gtest/gc/shared/test_memset_with_concurrent_readers.cpp
+index 24f25b87af..7a3845e336 100644
+--- a/test/hotspot/gtest/gc/shared/test_memset_with_concurrent_readers.cpp
++++ b/test/hotspot/gtest/gc/shared/test_memset_with_concurrent_readers.cpp
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -24,10 +24,13 @@
+ #include "precompiled.hpp"
+ #include "gc/shared/memset_with_concurrent_readers.hpp"
+ #include "utilities/globalDefinitions.hpp"
+-#include "unittest.hpp"
+ 
++#include "utilities/vmassert_uninstall.hpp"
+ #include <string.h>
+ #include <sstream>
++#include "utilities/vmassert_reinstall.hpp"
++
++#include "unittest.hpp"
+ 
+ static unsigned line_byte(const char* line, size_t i) {
+   return unsigned(line[i]) & 0xFF;
+diff --git a/test/hotspot/gtest/jfr/test_networkUtilization.cpp b/test/hotspot/gtest/jfr/test_networkUtilization.cpp
+index 19d6a6e2c2..42cd18356b 100644
+--- a/test/hotspot/gtest/jfr/test_networkUtilization.cpp
++++ b/test/hotspot/gtest/jfr/test_networkUtilization.cpp
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -42,11 +42,13 @@
+ #include "utilities/globalDefinitions.hpp"
+ #include "utilities/growableArray.hpp"
+ 
+-#include "unittest.hpp"
+-
++#include "utilities/vmassert_uninstall.hpp"
+ #include <vector>
+ #include <list>
+ #include <map>
++#include "utilities/vmassert_reinstall.hpp"
++
++#include "unittest.hpp"
+ 
+ namespace {
+ 
+diff --git a/test/hotspot/gtest/unittest.hpp b/test/hotspot/gtest/unittest.hpp
+index 0494a0e240..91edf6adba 100644
+--- a/test/hotspot/gtest/unittest.hpp
++++ b/test/hotspot/gtest/unittest.hpp
+@@ -28,19 +28,10 @@
+ #include <stdio.h>
+ 
+ #define GTEST_DONT_DEFINE_TEST 1
+-#include "gtest/gtest.h"
+ 
+-// gtest/gtest.h includes assert.h which will define the assert macro, but hotspot has its
+-// own standards incompatible assert macro that takes two parameters.
+-// The workaround is to undef assert and then re-define it. The re-definition
+-// must unfortunately be copied since debug.hpp might already have been
+-// included and a second include wouldn't work due to the header guards in debug.hpp.
+-#ifdef assert
+-  #undef assert
+-  #ifdef vmassert
+-    #define assert(p, ...) vmassert(p, __VA_ARGS__)
+-  #endif
+-#endif
++#include "utilities/vmassert_uninstall.hpp"
++#include "gtest/gtest.h"
++#include "utilities/vmassert_reinstall.hpp"
  
- CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1));
-@@ -227,14 +225,14 @@ CONSTANT_REGISTER_DECLARATION(FloatRegister, f31    , (31));
+ #define CONCAT(a, b) a ## b
  
- // Use VectorRegister as shortcut
- class VectorRegisterImpl;
--typedef const VectorRegisterImpl* VectorRegister;
-+typedef VectorRegisterImpl* VectorRegister;
+diff --git a/test/hotspot/jtreg/compiler/c2/TestBit.java b/test/hotspot/jtreg/compiler/c2/TestBit.java
+index 7805918c28..823b9f39db 100644
+--- a/test/hotspot/jtreg/compiler/c2/TestBit.java
++++ b/test/hotspot/jtreg/compiler/c2/TestBit.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -34,7 +34,7 @@ import jdk.test.lib.process.ProcessTools;
+  *
+  * @run driver compiler.c2.TestBit
+  *
+- * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le"
++ * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le" | os.arch == "riscv64"
+  * @requires vm.debug == true & vm.compiler2.enabled
+  */
+ public class TestBit {
+@@ -54,7 +54,8 @@ public class TestBit {
+         String expectedTestBitInstruction =
+             "ppc64le".equals(System.getProperty("os.arch")) ? "ANDI" :
+             "aarch64".equals(System.getProperty("os.arch")) ? "tb"   :
+-            "amd64".equals(System.getProperty("os.arch"))   ? "test" : null;
++            "amd64".equals(System.getProperty("os.arch"))   ? "test" :
++            "riscv64".equals(System.getProperty("os.arch")) ? "andi" : null;
  
--inline constexpr VectorRegister as_VectorRegister(int encoding);
-+inline VectorRegister as_VectorRegister(int encoding) {
-+  return (VectorRegister)(intptr_t) encoding;
+         if (expectedTestBitInstruction != null) {
+             output.shouldContain(expectedTestBitInstruction);
+diff --git a/test/hotspot/jtreg/compiler/calls/TestManyArgs.java b/test/hotspot/jtreg/compiler/calls/TestManyArgs.java
+new file mode 100644
+index 0000000000..fbd9c13d7c
+--- /dev/null
++++ b/test/hotspot/jtreg/compiler/calls/TestManyArgs.java
+@@ -0,0 +1,65 @@
++/*
++ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2024, Rivos Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++/* @test
++ * @summary Pass values on stack.
++ * @requires os.arch == "riscv64"
++ * @run main/native compiler.calls.TestManyArgs
++ */
++
++package compiler.calls;
++
++public class TestManyArgs {
++    static {
++        System.loadLibrary("TestManyArgs");
++    }
++
++    native static void scramblestack();
++
++    native static int checkargs(int arg0, short arg1, byte arg2,
++                                int arg3, short arg4, byte arg5,
++                                int arg6, short arg7, byte arg8,
++                                int arg9, short arg10, byte arg11);
++
++    static int compiledbridge(int arg0, short arg1, byte arg2,
++                              int arg3, short arg4, byte arg5,
++                              int arg6, short arg7, byte arg8,
++                              int arg9, short arg10, byte arg11) {
++        return checkargs(arg0, arg1, arg2, arg3, arg4, arg5,
++                         arg6, arg7, arg8, arg9, arg10, arg11);
++    }
++
++    static public void main(String[] args) {
++        scramblestack();
++        for (int i = 0; i < 20000; i++) {
++            int res = compiledbridge((int)0xf, (short)0xf, (byte)0xf,
++                                     (int)0xf, (short)0xf, (byte)0xf,
++                                     (int)0xf, (short)0xf, (byte)0xf,
++                                     (int)0xf, (short)0xf, (byte)0xf);
++            if (res != 0) {
++                throw new RuntimeException("Test failed");
++            }
++        }
++    }
 +}
- 
- // The implementation of vector registers for RVV
- class VectorRegisterImpl: public AbstractRegisterImpl {
--  static constexpr VectorRegister first();
--
-  public:
-   enum {
-     number_of_registers    = 32,
-@@ -242,23 +240,21 @@ class VectorRegisterImpl: public AbstractRegisterImpl {
-   };
- 
-   // construction
--  inline friend constexpr VectorRegister as_VectorRegister(int encoding);
-+  inline friend VectorRegister as_VectorRegister(int encoding);
- 
-   VMReg as_VMReg() const;
- 
-   // derived registers, offsets, and addresses
--  VectorRegister successor() const { return this + 1; }
-+  VectorRegister successor() const { return as_VectorRegister(encoding() + 1); }
- 
-   // accessors
-   int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
--  int encoding_nocheck() const    { return this - first(); }
-+  int encoding_nocheck() const    { return (intptr_t)this; }
-   bool is_valid() const           { return (unsigned)encoding_nocheck() < number_of_registers; }
-   const char* name() const;
- 
- };
- 
--REGISTER_IMPL_DECLARATION(VectorRegister, VectorRegisterImpl, VectorRegisterImpl::number_of_registers);
--
- // The vector registers of RVV
- CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg , (-1));
- 
-@@ -315,8 +311,71 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl {
-   static const int max_fpr;
- };
- 
--typedef AbstractRegSet<Register> RegSet;
--typedef AbstractRegSet<FloatRegister> FloatRegSet;
--typedef AbstractRegSet<VectorRegister> VectorRegSet;
-+// A set of registers
-+class RegSet {
-+  uint32_t _bitset;
+diff --git a/test/hotspot/jtreg/compiler/calls/libTestManyArgs.c b/test/hotspot/jtreg/compiler/calls/libTestManyArgs.c
+new file mode 100644
+index 0000000000..8836c79e43
+--- /dev/null
++++ b/test/hotspot/jtreg/compiler/calls/libTestManyArgs.c
+@@ -0,0 +1,69 @@
++/*
++ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2024, Rivos Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
 +
-+  RegSet(uint32_t bitset) : _bitset(bitset) { }
++#include "jni.h"
 +
-+public:
++#ifdef riscv64
++/* RV64 ABI pass all integers as 64-bit, in registers or on stack
++ * As compiler may choose to load smaller width than 64-bit if passed on stack,
++ * this test may not find any bugs.
++ * Therefore we trick the compiler todo 64-bit loads,
++ * by saying these args are jlongs.
++ */
++JNIEXPORT jint JNICALL Java_compiler_calls_TestManyArgs_checkargs(JNIEnv* env, jclass jclazz,
++                                                                  jlong arg0, jlong arg1, jlong arg2,
++                                                                  jlong arg3, jlong arg4, jlong arg5,
++                                                                  jlong arg6, jlong arg7, jlong arg8,
++                                                                  jlong arg9, jlong arg10, jlong arg11)
++#else
++JNIEXPORT jint JNICALL Java_compiler_calls_TestManyArgs_checkargs(JNIEnv* env, jclass jclazz,
++                                                                  jint arg0, jshort arg1, jbyte arg2,
++                                                                  jint arg3, jshort arg4, jbyte arg5,
++                                                                  jint arg6, jshort arg7, jbyte arg8,
++                                                                  jint arg9, jshort arg10, jbyte arg11)
++#endif
++{
++    if (arg0 != 0xf) return 1;
++    if (arg1 != 0xf) return 1;
++    if (arg2 != 0xf) return 1;
++    if (arg3 != 0xf) return 1;
++    if (arg4 != 0xf) return 1;
++    if (arg5 != 0xf) return 1;
++    if (arg6 != 0xf) return 1;
++    if (arg7 != 0xf) return 1;
++    if (arg8 != 0xf) return 1;
++    if (arg9 != 0xf) return 1;
++    if (arg10 != 0xf) return 1;
++    if (arg11 != 0xf) return 1;
++    return 0;
++}
 +
-+  RegSet() : _bitset(0) { }
++JNIEXPORT
++void JNICALL Java_compiler_calls_TestManyArgs_scramblestack(JNIEnv* env, jclass jclazz)
++{
++    volatile char stack[12*8];
++    for (unsigned int i = 0; i < sizeof(stack); i++) {
++        stack[i] = (char)0xff;
++    }
++}
+diff --git a/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java
+new file mode 100644
+index 0000000000..5a1b659bbe
+--- /dev/null
++++ b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java
+@@ -0,0 +1,80 @@
++/*
++ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Alibaba Group Holding Limited. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
 +
-+  RegSet(Register r1) : _bitset(r1->bit()) { }
++/*
++ * @test
++ * @summary Test libm intrinsics
++ * @library /test/lib /
++ *
++ * @build sun.hotspot.WhiteBox
++ * @run driver ClassFileInstaller sun.hotspot.WhiteBox
++ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
++ *                   -XX:-BackgroundCompilation -XX:-UseOnStackReplacement
++ *                   compiler.floatingpoint.TestLibmIntrinsics
++ */
 +
-+  RegSet operator+(const RegSet aSet) const {
-+    RegSet result(_bitset | aSet._bitset);
-+    return result;
-+  }
++package compiler.floatingpoint;
 +
-+  RegSet operator-(const RegSet aSet) const {
-+    RegSet result(_bitset & ~aSet._bitset);
-+    return result;
-+  }
++import compiler.whitebox.CompilerWhiteBoxTest;
++import sun.hotspot.WhiteBox;
 +
-+  RegSet &operator+=(const RegSet aSet) {
-+    *this = *this + aSet;
-+    return *this;
-+  }
++import java.lang.reflect.Method;
 +
-+  RegSet &operator-=(const RegSet aSet) {
-+    *this = *this - aSet;
-+    return *this;
-+  }
++public class TestLibmIntrinsics {
 +
-+  static RegSet of(Register r1) {
-+    return RegSet(r1);
-+  }
++    private static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox();
 +
-+  static RegSet of(Register r1, Register r2) {
-+    return of(r1) + r2;
-+  }
++    private static final double pi = 3.1415926;
 +
-+  static RegSet of(Register r1, Register r2, Register r3) {
-+    return of(r1, r2) + r3;
-+  }
++    private static final double expected = 2.5355263553695413;
 +
-+  static RegSet of(Register r1, Register r2, Register r3, Register r4) {
-+    return of(r1, r2, r3) + r4;
-+  }
++    static double m() {
++        return Math.pow(pi, Math.sin(Math.cos(Math.tan(Math.log(Math.log10(Math.exp(pi)))))));
++    }
 +
-+  static RegSet range(Register start, Register end) {
-+    uint32_t bits = ~0;
-+    bits <<= start->encoding();
-+    bits <<= 31 - end->encoding();
-+    bits >>= 31 - end->encoding();
++    static public void main(String[] args) throws NoSuchMethodException {
++        Method test_method = compiler.floatingpoint.TestLibmIntrinsics.class.getDeclaredMethod("m");
 +
-+    return RegSet(bits);
-+  }
++        double interpreter_result = m();
 +
-+  uint32_t bits() const { return _bitset; }
++        // Compile with C1 if possible
++        WHITE_BOX.enqueueMethodForCompilation(test_method, CompilerWhiteBoxTest.COMP_LEVEL_SIMPLE);
 +
-+private:
++        double c1_result = m();
 +
-+  Register first() {
-+    uint32_t first = _bitset & -_bitset;
-+    return first ? as_Register(exact_log2(first)) : noreg;
-+  }
-+};
- 
- #endif // CPU_RISCV_REGISTER_RISCV_HPP
-
-From 9c85aa8d3387d795f9c2f4795ffc7f9d7f814d92 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 19:24:49 +0800
-Subject: [PATCH 103/140] Revert JDK-8240363: Refactor Compile::Output() to its
- own Phase
-
----
- .../cpu/riscv/macroAssembler_riscv.cpp        |  2 +-
- src/hotspot/cpu/riscv/riscv.ad                | 20 +++++++++----------
- 2 files changed, 11 insertions(+), 11 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index e2841c28c37..656334f326b 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -3027,7 +3027,7 @@ address MacroAssembler::trampoline_call(Address entry, CodeBuffer* cbuf) {
-     CompileTask* task = ciEnv::current()->task();
-     in_scratch_emit_size =
-       (task != NULL && is_c2_compile(task->comp_level()) &&
--       Compile::current()->output()->in_scratch_emit_size());
-+       Compile::current()->in_scratch_emit_size());
- #endif
-     if (!in_scratch_emit_size) {
-       address stub = emit_trampoline_stub(offset(), entry.target());
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index c5e0ae23029..d736750d02d 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1029,7 +1029,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
- //=============================================================================
- const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
- 
--int ConstantTable::calculate_table_base_offset() const {
-+int Compile::ConstantTable::calculate_table_base_offset() const {
-   return 0;  // absolute addressing, no offset
- }
- 
-@@ -1058,9 +1058,9 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
-   assert_cond(st != NULL && ra_ != NULL);
-   Compile* C = ra_->C;
- 
--  int framesize = C->output()->frame_slots() << LogBytesPerInt;
-+  int framesize = C->frame_slots() << LogBytesPerInt;
- 
--  if (C->output()->need_stack_bang(framesize)) {
-+  if (C->need_stack_bang(framesize)) {
-     st->print("# stack bang size=%d\n\t", framesize);
-   }
- 
-@@ -1077,7 +1077,7 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-   MacroAssembler _masm(&cbuf);
- 
-   // n.b. frame size includes space for return pc and fp
--  const int framesize = C->output()->frame_size_in_bytes();
-+  const int framesize = C->frame_size_in_bytes();
- 
-   // insert a nop at the start of the prolog so we can patch in a
-   // branch if we need to invalidate the method later
-@@ -1085,8 +1085,8 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
- 
-   assert_cond(C != NULL);
- 
--  int bangsize = C->output()->bang_size_in_bytes();
--  if (C->output()->need_stack_bang(bangsize)) {
-+  int bangsize = C->bang_size_in_bytes();
-+  if (C->need_stack_bang(bangsize)) {
-     __ generate_stack_overflow_check(bangsize);
-   }
- 
-@@ -1096,12 +1096,12 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-     Unimplemented();
-   }
- 
--  C->output()->set_frame_complete(cbuf.insts_size());
-+  C->set_frame_complete(cbuf.insts_size());
- 
-   if (C->has_mach_constant_base_node()) {
-     // NOTE: We set the table base offset here because users might be
-     // emitted before MachConstantBaseNode.
--    ConstantTable& constant_table = C->output()->constant_table();
-+    Compile::ConstantTable& constant_table = C->constant_table();
-     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
-   }
- }
-@@ -1125,7 +1125,7 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
-   assert_cond(st != NULL && ra_ != NULL);
-   Compile* C = ra_->C;
-   assert_cond(C != NULL);
--  int framesize = C->output()->frame_size_in_bytes();
-+  int framesize = C->frame_size_in_bytes();
- 
-   st->print("# pop frame %d\n\t", framesize);
- 
-@@ -1152,7 +1152,7 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
-   Compile* C = ra_->C;
-   MacroAssembler _masm(&cbuf);
-   assert_cond(C != NULL);
--  int framesize = C->output()->frame_size_in_bytes();
-+  int framesize = C->frame_size_in_bytes();
- 
-   __ remove_frame(framesize);
- 
-
-From 3a58114310a56ebca04ba44b4883d205096eb844 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 19:36:09 +0800
-Subject: [PATCH 104/140] Revert RotateLeft && RotateRight matching rules
-
----
- src/hotspot/cpu/riscv/riscv.ad   |  2 -
- src/hotspot/cpu/riscv/riscv_b.ad | 76 --------------------------------
- 2 files changed, 78 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index d736750d02d..1e6495692da 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1505,8 +1505,6 @@ const bool Matcher::match_rule_supported(int opcode) {
-     case Op_PopCountL:
-       return UsePopCountInstruction;
- 
--    case Op_RotateRight:
--    case Op_RotateLeft:
-     case Op_CountLeadingZerosI:
-     case Op_CountLeadingZerosL:
-     case Op_CountTrailingZerosI:
-diff --git a/src/hotspot/cpu/riscv/riscv_b.ad b/src/hotspot/cpu/riscv/riscv_b.ad
-index 4488c1c4031..b9e04c432e1 100644
---- a/src/hotspot/cpu/riscv/riscv_b.ad
-+++ b/src/hotspot/cpu/riscv/riscv_b.ad
-@@ -25,82 +25,6 @@
- 
- // RISCV Bit-Manipulation Extension Architecture Description File
- 
--instruct rorI_imm_rvb(iRegINoSp dst, iRegI src, immI shift) %{
--  predicate(UseRVB);
--  match(Set dst (RotateRight src shift));
--
--  format %{ "roriw  $dst, $src, ($shift & 0x1f)\t#@rorI_imm_rvb" %}
--
--  ins_cost(ALU_COST);
--  ins_encode %{
--    __ roriw(as_Register($dst$$reg), as_Register($src$$reg), $shift$$constant & 0x1f);
--  %}
--
--  ins_pipe(ialu_reg_shift);
--%}
--
--instruct rorL_imm_rvb(iRegLNoSp dst, iRegL src, immI shift) %{
--  predicate(UseRVB);
--  match(Set dst (RotateRight src shift));
--
--  format %{ "rori  $dst, $src, ($shift & 0x3f)\t#@rorL_imm_rvb" %}
--
--  ins_cost(ALU_COST);
--  ins_encode %{
--    __ rori(as_Register($dst$$reg), as_Register($src$$reg), $shift$$constant & 0x3f);
--  %}
--
--  ins_pipe(ialu_reg_shift);
--%}
--
--instruct rorI_reg_rvb(iRegINoSp dst, iRegI src, iRegI shift) %{
--  predicate(UseRVB);
--  match(Set dst (RotateRight src shift));
--
--  format %{ "rorw  $dst, $src, $shift\t#@rorI_reg_rvb" %}
--  ins_cost(ALU_COST);
--  ins_encode %{
--    __ rorw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
--  %}
--  ins_pipe(ialu_reg_reg);
--%}
--
--instruct rorL_reg_rvb(iRegLNoSp dst, iRegL src, iRegI shift) %{
--  predicate(UseRVB);
--  match(Set dst (RotateRight src shift));
--
--  format %{ "ror  $dst, $src, $shift\t#@rorL_reg_rvb" %}
--  ins_cost(ALU_COST);
--  ins_encode %{
--    __ ror(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
--  %}
--  ins_pipe(ialu_reg_reg);
--%}
--
--instruct rolI_reg_rvb(iRegINoSp dst, iRegI src, iRegI shift) %{
--  predicate(UseRVB);
--  match(Set dst (RotateLeft src shift));
--
--  format %{ "rolw  $dst, $src, $shift\t#@rolI_reg_rvb" %}
--  ins_cost(ALU_COST);
--  ins_encode %{
--    __ rolw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
--  %}
--  ins_pipe(ialu_reg_reg);
--%}
--
--instruct rolL_reg_rvb(iRegLNoSp dst, iRegL src, iRegI shift) %{
--  predicate(UseRVB);
--  match(Set dst (RotateLeft src shift));
--
--  format %{ "rol  $dst, $src, $shift\t#@rolL_reg_rvb" %}
--  ins_cost(ALU_COST);
--  ins_encode %{
--    __ rol(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
--  %}
--  ins_pipe(ialu_reg_reg);
--%}
--
- // Convert oop into int for vectors alignment masking
- instruct convP2I_rvb(iRegINoSp dst, iRegP src) %{
-   predicate(UseRVB);
-
-From 21577388eda0218eeb4b28bc71ecf5737d40639e Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 19:49:28 +0800
-Subject: [PATCH 105/140] Revert JDK-8230565: ZGC: Redesign C2 load barrier to
- expand on the MachNode level
-
----
- src/hotspot/cpu/riscv/riscv.ad | 14 ++++----------
- 1 file changed, 4 insertions(+), 10 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 1e6495692da..533eaf843e3 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -4324,7 +4324,6 @@ instruct loadRange(iRegINoSp dst, memory mem)
- instruct loadP(iRegPNoSp dst, memory mem)
- %{
-   match(Set dst (LoadP mem));
--  predicate(n->as_Load()->barrier_data() == 0);
- 
-   ins_cost(LOAD_COST);
-   format %{ "ld  $dst, $mem\t# ptr, #@loadP" %}
-@@ -5060,8 +5059,6 @@ instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoS
- 
- instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
- %{
--  predicate(n->as_LoadStore()->barrier_data() == 0);
--
-   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
- 
-   ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
-@@ -5181,7 +5178,7 @@ instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegL
- 
- instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
- %{
--  predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));
-+  predicate(needs_acquiring_load_reserved(n));
- 
-   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
- 
-@@ -5327,7 +5324,6 @@ instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN ne
- 
- instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval)
- %{
--  predicate(n->as_LoadStore()->barrier_data() == 0);
-   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
- 
-   ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
-@@ -5462,7 +5458,7 @@ instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN
- 
- instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval)
- %{
--  predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));
-+  predicate(needs_acquiring_load_reserved(n));
- 
-   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
- 
-@@ -5592,7 +5588,6 @@ instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN ne
- 
- instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
- %{
--  predicate(n->as_LoadStore()->barrier_data() == 0);
-   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
- 
-   ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
-@@ -5731,7 +5726,7 @@ instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN
- 
- instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
- %{
--  predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));
-+  predicate(needs_acquiring_load_reserved(n));
- 
-   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
- 
-@@ -5798,7 +5793,6 @@ instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev)
- 
- instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev)
- %{
--  predicate(n->as_LoadStore()->barrier_data() == 0);
-   match(Set prev (GetAndSetP mem newv));
- 
-   ins_cost(ALU_COST);
-@@ -5865,7 +5859,7 @@ instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev)
- 
- instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev)
- %{
--  predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0));
-+  predicate(needs_acquiring_load_reserved(n));
- 
-   match(Set prev (GetAndSetP mem newv));
- 
-
-From 4673921af60f4779d4322256f92bb60a850cb035 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 19:51:09 +0800
-Subject: [PATCH 106/140] Revert JDK-8252990: Intrinsify Unsafe.storeStoreFence
-
----
- src/hotspot/cpu/riscv/riscv.ad | 1 -
- 1 file changed, 1 deletion(-)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 533eaf843e3..5fa3b85c001 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -7537,7 +7537,6 @@ instruct membar_release() %{
- 
- instruct membar_storestore() %{
-   match(MemBarStoreStore);
--  match(StoreStoreFence);
-   ins_cost(ALU_COST);
- 
-   format %{ "MEMBAR-store-store\t#@membar_storestore" %}
-
-From e254a03e87ffc6d8f563dbd7db1b607a95657263 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 19:54:02 +0800
-Subject: [PATCH 107/140] Revert JDK-8255150: Add utility methods to check long
- indexes and ranges && JDK-8252372: Check if cloning is required to move loads
- out of loops in PhaseIdealLoop::split_if_with_blocks_post()
-
----
- src/hotspot/cpu/riscv/riscv.ad | 33 ---------------------------------
- 1 file changed, 33 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 5fa3b85c001..388e65f623d 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -7621,17 +7621,6 @@ instruct castPP(iRegPNoSp dst)
-   ins_pipe(pipe_class_empty);
- %}
- 
--instruct castLL(iRegL dst)
--%{
--  match(Set dst (CastLL dst));
--
--  size(0);
--  format %{ "# castLL of $dst, #@castLL" %}
--  ins_encode(/* empty encoding */);
--  ins_cost(0);
--  ins_pipe(pipe_class_empty);
--%}
--
- instruct castII(iRegI dst)
- %{
-   match(Set dst (CastII dst));
-@@ -7654,28 +7643,6 @@ instruct checkCastPP(iRegPNoSp dst)
-   ins_pipe(pipe_class_empty);
- %}
- 
--instruct castFF(fRegF dst)
--%{
--  match(Set dst (CastFF dst));
--
--  size(0);
--  format %{ "# castFF of $dst" %}
--  ins_encode(/* empty encoding */);
--  ins_cost(0);
--  ins_pipe(pipe_class_empty);
--%}
--
--instruct castDD(fRegD dst)
--%{
--  match(Set dst (CastDD dst));
--
--  size(0);
--  format %{ "# castDD of $dst" %}
--  ins_encode(/* empty encoding */);
--  ins_cost(0);
--  ins_pipe(pipe_class_empty);
--%}
--
- // ============================================================================
- // Convert Instructions
- 
-
-From 2c1820363992d09ef0cd2ed2553c04e0f7afd91f Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 20:02:14 +0800
-Subject: [PATCH 108/140] Revert reset_label part of JDK-8248411: [aarch64]
- Insufficient error handling when CodeBuffer is exhausted
-
----
- src/hotspot/cpu/riscv/macroAssembler_riscv.cpp |  2 +-
- src/hotspot/cpu/riscv/macroAssembler_riscv.hpp | 14 +++++---------
- 2 files changed, 6 insertions(+), 10 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index 656334f326b..37ccf132986 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -3784,7 +3784,7 @@ address MacroAssembler::zero_words(Register ptr, Register cnt)
-     if (StubRoutines::riscv::complete()) {
-       address tpc = trampoline_call(zero_blocks);
-       if (tpc == NULL) {
--        DEBUG_ONLY(reset_labels(around));
-+        DEBUG_ONLY(reset_labels1(around));
-         postcond(pc() == badAddress);
-         return NULL;
-       }
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index 953bca3cbd8..45ffc663963 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -815,17 +815,13 @@ class MacroAssembler: public Assembler {
- private:
- 
- #ifdef ASSERT
--  // Template short-hand support to clean-up after a failed call to trampoline
-+  // Macro short-hand support to clean-up after a failed call to trampoline
-   // call generation (see trampoline_call() below), when a set of Labels must
-   // be reset (before returning).
--  template<typename Label, typename... More>
--  void reset_labels(Label& lbl, More&... more) {
--    lbl.reset(); reset_labels(more...);
--  }
--  template<typename Label>
--  void reset_labels(Label& lbl) {
--    lbl.reset();
--  }
-+#define reset_labels1(L1) L1.reset()
-+#define reset_labels2(L1, L2) L1.reset(); L2.reset()
-+#define reset_labels3(L1, L2, L3) L1.reset(); reset_labels2(L2, L3)
-+#define reset_labels5(L1, L2, L3, L4, L5) reset_labels2(L1, L2); reset_labels3(L3, L4, L5)
- #endif
-   void repne_scan(Register addr, Register value, Register count, Register tmp);
++        WHITE_BOX.deoptimizeMethod(test_method);
++
++        // Compile it with C2 if possible
++        WHITE_BOX.enqueueMethodForCompilation(test_method, CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION);
++
++        double c2_result = m();
++
++        if (interpreter_result != c1_result ||
++            interpreter_result != c2_result ||
++            c1_result != c2_result) {
++            System.out.println("interpreter = " + interpreter_result + " c1 = " + c1_result + " c2 = " + c2_result);
++            throw new RuntimeException("Test Failed");
++        }
++    }
++}
+diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java
+index 558b4218f0..55374b116e 100644
+--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java
++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -42,6 +42,7 @@ package compiler.intrinsics.sha.cli;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
+ import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
  
-
-From 014972a0778b8c5568fae9e92d286b634cb44674 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 20:30:49 +0800
-Subject: [PATCH 109/140] Revert JDK-8242289: C2: Support platform-specific
- node cloning in Matcher
-
----
- src/hotspot/cpu/riscv/riscv.ad | 12 +-----------
- 1 file changed, 1 insertion(+), 11 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 388e65f623d..7cd6c2995ba 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1765,20 +1765,10 @@ bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
+@@ -54,6 +55,8 @@ public class TestUseSHA1IntrinsicsOptionOnUnsupportedCPU {
+                         SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
+                 new GenericTestCaseForUnsupportedAArch64CPU(
+                         SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
++                new GenericTestCaseForUnsupportedRISCV64CPU(
++                        SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
+                 new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU(
+                         SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
+                 new GenericTestCaseForOtherCPU(
+diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java
+index 3ed72bf0a9..8fb82ee453 100644
+--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java
++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -42,6 +42,7 @@ package compiler.intrinsics.sha.cli;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
+ import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
  
- const bool Matcher::convi2l_type_required = false;
+@@ -54,6 +55,8 @@ public class TestUseSHA256IntrinsicsOptionOnUnsupportedCPU {
+                         SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
+                 new GenericTestCaseForUnsupportedAArch64CPU(
+                         SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
++                new GenericTestCaseForUnsupportedRISCV64CPU(
++                        SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
+                 new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU(
+                         SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
+                 new GenericTestCaseForOtherCPU(
+diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java
+index c05cf309da..aca32137ed 100644
+--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java
++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -42,6 +42,7 @@ package compiler.intrinsics.sha.cli;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
+ import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
  
--// Should the Matcher clone input 'm' of node 'n'?
--bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
--  assert_cond(m != NULL);
--  if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
--    mstack.push(m, Visit);           // m = ShiftCntV
--    return true;
--  }
--  return false;
--}
--
- // Should the Matcher clone shifts on addressing modes, expecting them
- // to be subsumed into complex addressing expressions or compute them
- // into registers?
--bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
-+bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
-   return clone_base_plus_offset_address(m, mstack, address_visited);
- }
+@@ -54,6 +55,8 @@ public class TestUseSHA512IntrinsicsOptionOnUnsupportedCPU {
+                         SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
+                 new GenericTestCaseForUnsupportedAArch64CPU(
+                         SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
++                new GenericTestCaseForUnsupportedRISCV64CPU(
++                        SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
+                 new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU(
+                         SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
+                 new GenericTestCaseForOtherCPU(
+diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java
+index 58ce5366ba..8deac4f789 100644
+--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java
++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -41,6 +41,7 @@ package compiler.intrinsics.sha.cli;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
+ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
+ import compiler.intrinsics.sha.cli.testcases.UseSHASpecificTestCaseForUnsupportedCPU;
  
-
-From d15e155e9b84f4789cfbb1cf75382be859b0a8ca Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 20:40:00 +0800
-Subject: [PATCH 110/140] Revert JDK-8255782: Turn UseTLAB and ResizeTLAB from
- product_pd to product, defaulting to "true"
-
----
- src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 2 ++
- src/hotspot/cpu/riscv/c2_globals_riscv.hpp | 2 ++
- 2 files changed, 4 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-index 8f2f4e0e81d..25e00bea901 100644
---- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-@@ -34,6 +34,8 @@
+@@ -53,6 +54,8 @@ public class TestUseSHAOptionOnUnsupportedCPU {
+                         SHAOptionsBase.USE_SHA_OPTION),
+                 new GenericTestCaseForUnsupportedAArch64CPU(
+                         SHAOptionsBase.USE_SHA_OPTION),
++                new GenericTestCaseForUnsupportedRISCV64CPU(
++                        SHAOptionsBase.USE_SHA_OPTION),
+                 new UseSHASpecificTestCaseForUnsupportedCPU(
+                         SHAOptionsBase.USE_SHA_OPTION),
+                 new GenericTestCaseForOtherCPU(
+diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
+index faa9fdbae6..2663500204 100644
+--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -32,26 +32,27 @@ import jdk.test.lib.cli.predicate.OrPredicate;
  
- #ifndef TIERED
- define_pd_global(bool, BackgroundCompilation,        true );
-+define_pd_global(bool, UseTLAB,                      true );
-+define_pd_global(bool, ResizeTLAB,                   true );
- define_pd_global(bool, InlineIntrinsics,             true );
- define_pd_global(bool, PreferInterpreterNativeStubs, false);
- define_pd_global(bool, ProfileTraps,                 false);
-diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
-index 33d78fb2f6f..3da1f1c6d86 100644
---- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
-@@ -33,6 +33,8 @@
- // (see c2_globals.hpp).  Alpha-sorted.
+ /**
+  * Generic test case for SHA-related options targeted to any CPU except
+- * AArch64, PPC, S390x, SPARC and X86.
++ * AArch64, RISCV64, PPC, S390x, SPARC and X86.
+  */
+ public class GenericTestCaseForOtherCPU extends
+         SHAOptionsBase.TestCase {
+     public GenericTestCaseForOtherCPU(String optionName) {
+-        // Execute the test case on any CPU except AArch64, PPC, S390x, SPARC and X86.
++        // Execute the test case on any CPU except AArch64, RISCV64, PPC, S390x, SPARC and X86.
+         super(optionName, new NotPredicate(
+                               new OrPredicate(Platform::isAArch64,
++                              new OrPredicate(Platform::isRISCV64,
+                               new OrPredicate(Platform::isS390x,
+                               new OrPredicate(Platform::isSparc,
+                               new OrPredicate(Platform::isPPC,
+                               new OrPredicate(Platform::isX64,
+-                                              Platform::isX86)))))));
++                                              Platform::isX86))))))));
+     }
  
- define_pd_global(bool, BackgroundCompilation,        true);
-+define_pd_global(bool, UseTLAB,                      true);
-+define_pd_global(bool, ResizeTLAB,                   true);
- define_pd_global(bool, CICompileOSR,                 true);
- define_pd_global(bool, InlineIntrinsics,             true);
- define_pd_global(bool, PreferInterpreterNativeStubs, false);
-
-From f3fa0cfa987743b4ee83332ddf71add421561908 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 20:49:57 +0800
-Subject: [PATCH 111/140] Revert JDK-8265245: depChecker_<cpu> don't have any
- functionalities
-
----
- src/hotspot/cpu/riscv/depChecker_riscv.hpp | 32 ++++++++++++++++++++++
- 1 file changed, 32 insertions(+)
- create mode 100644 src/hotspot/cpu/riscv/depChecker_riscv.hpp
-
-diff --git a/src/hotspot/cpu/riscv/depChecker_riscv.hpp b/src/hotspot/cpu/riscv/depChecker_riscv.hpp
+     @Override
+     protected void verifyWarnings() throws Throwable {
+         String shouldPassMessage = String.format("JVM should start with "
+                 + "option '%s' without any warnings", optionName);
+-        // Verify that on non-x86, non-SPARC and non-AArch64 CPU usage of
++        // Verify that on non-x86, non-RISCV64, non-SPARC and non-AArch64 CPU usage of
+         //  SHA-related options will not cause any warnings.
+         CommandLineOptionTest.verifySameJVMStartup(null,
+                 new String[] { ".*" + optionName + ".*" }, shouldPassMessage,
+diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
 new file mode 100644
-index 00000000000..e9ff307b647
+index 0000000000..8566d57c39
 --- /dev/null
-+++ b/src/hotspot/cpu/riscv/depChecker_riscv.hpp
-@@ -0,0 +1,32 @@
++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
+@@ -0,0 +1,115 @@
 +/*
-+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -80560,1887 +57267,1285 @@ index 00000000000..e9ff307b647
 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 + * or visit www.oracle.com if you need additional information or have any
 + * questions.
-+ *
 + */
 +
-+#ifndef CPU_RISCV_VM_DEPCHECKER_RISCV_HPP
-+#define CPU_RISCV_VM_DEPCHECKER_RISCV_HPP
++package compiler.intrinsics.sha.cli.testcases;
 +
-+// Nothing to do on riscv
++import compiler.intrinsics.sha.cli.SHAOptionsBase;
++import jdk.test.lib.process.ExitCode;
++import jdk.test.lib.Platform;
++import jdk.test.lib.cli.CommandLineOptionTest;
++import jdk.test.lib.cli.predicate.AndPredicate;
++import jdk.test.lib.cli.predicate.NotPredicate;
 +
-+#endif // CPU_RISCV_VM_DEPCHECKER_RISCV_HPP
-
-From 97a3d4d3b98a450aa316eaa94103cf8473d12d50 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 20:58:34 +0800
-Subject: [PATCH 112/140] Revert JDK-8241438: Move IntelJccErratum mitigation
- code to platform-specific code
-
----
- src/hotspot/cpu/riscv/riscv.ad | 18 ------------------
- 1 file changed, 18 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index 7cd6c2995ba..fc6823daf8b 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -740,13 +740,6 @@ class HandlerImpl {
-   }
- };
- 
--class Node::PD {
--public:
--  enum NodeFlags {
--    _last_flag = Node::_last_flag
--  };
--};
--
- bool is_CAS(int opcode, bool maybe_volatile);
- 
- // predicate controlling translation of CompareAndSwapX
-@@ -805,17 +798,6 @@ void reg_mask_init() {
-   }
- }
- 
--void PhaseOutput::pd_perform_mach_node_analysis() {
--}
--
--int MachNode::pd_alignment_required() const {
--  return 1;
--}
--
--int MachNode::compute_padding(int current_offset) const {
--  return 0;
--}
--
- // is_CAS(int opcode, bool maybe_volatile)
- //
- // return true if opcode is one of the possible CompareAndSwapX
-
-From 8a3e7b81b79918a4f2feb4d9226ab8be6c43c28a Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 21:03:47 +0800
-Subject: [PATCH 113/140] Revert JDK-8260355: AArch64: deoptimization stub
- should save vector registers
-
----
- src/hotspot/cpu/riscv/registerMap_riscv.cpp | 45 ---------------------
- src/hotspot/cpu/riscv/registerMap_riscv.hpp |  1 -
- 2 files changed, 46 deletions(-)
- delete mode 100644 src/hotspot/cpu/riscv/registerMap_riscv.cpp
-
-diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.cpp b/src/hotspot/cpu/riscv/registerMap_riscv.cpp
-deleted file mode 100644
-index 26c1edc36ff..00000000000
---- a/src/hotspot/cpu/riscv/registerMap_riscv.cpp
-+++ /dev/null
-@@ -1,45 +0,0 @@
--/*
-- * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#include "precompiled.hpp"
--#include "runtime/registerMap.hpp"
--#include "vmreg_riscv.inline.hpp"
--
--address RegisterMap::pd_location(VMReg base_reg, int slot_idx) const {
--  if (base_reg->is_VectorRegister()) {
--    assert(base_reg->is_concrete(), "must pass base reg");
--    int base_reg_enc = (base_reg->value() - ConcreteRegisterImpl::max_fpr) /
--                       VectorRegisterImpl::max_slots_per_register;
--    intptr_t offset_in_bytes = slot_idx * VMRegImpl::stack_slot_size;
--    address base_location = location(base_reg);
--    if (base_location != NULL) {
--      return base_location + offset_in_bytes;
--    } else {
--      return NULL;
--    }
--  } else {
--    return location(base_reg->next(slot_idx));
--  }
--}
-diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.hpp b/src/hotspot/cpu/riscv/registerMap_riscv.hpp
-index f34349811a9..fef8ca9b64e 100644
---- a/src/hotspot/cpu/riscv/registerMap_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/registerMap_riscv.hpp
-@@ -33,7 +33,6 @@
-   // This is the hook for finding a register in an "well-known" location,
-   // such as a register block of a predetermined format.
-   address pd_location(VMReg reg) const { return NULL; }
--  address pd_location(VMReg base_reg, int slot_idx) const;
- 
-   // no PD state to clear or copy:
-   void pd_clear() {}
-
-From 5fc20f93a312f9189b55c5236c15a55b3da10cf9 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 21:05:37 +0800
-Subject: [PATCH 114/140] Revert JDK-8250914: Matcher::stack_direction() is
- unused
-
----
- src/hotspot/cpu/riscv/riscv.ad | 3 +++
- 1 file changed, 3 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index fc6823daf8b..c21508b6e7c 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -2326,6 +2326,9 @@ encode %{
- //         SP meets the minimum alignment.
- 
- frame %{
-+  // What direction does stack grow in (assumed to be same for C & Java)
-+  stack_direction(TOWARDS_LOW);
++/**
++ * Generic test case for SHA-related options targeted to RISCV64 CPUs
++ * which don't support instruction required by the tested option.
++ */
++public class GenericTestCaseForUnsupportedRISCV64CPU extends
++        SHAOptionsBase.TestCase {
 +
-   // These three registers define part of the calling convention
-   // between compiled code and the interpreter.
- 
-
-From aab3322fd2507a3aeae39c69ba871400dd342834 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 21:15:45 +0800
-Subject: [PATCH 115/140] Revert CacheWB*Node matching rules
-
----
- src/hotspot/cpu/riscv/riscv.ad | 8 --------
- 1 file changed, 8 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index c21508b6e7c..e410bd06aa6 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -1475,14 +1475,6 @@ const bool Matcher::match_rule_supported(int opcode) {
-   }
- 
-   switch (opcode) {
--    case Op_CacheWB:           // fall through
--    case Op_CacheWBPreSync:    // fall through
--    case Op_CacheWBPostSync:
--      if (!VM_Version::supports_data_cache_line_flush()) {
--        return false;
--      }
--      break;
--
-     case Op_PopCountI:
-     case Op_PopCountL:
-       return UsePopCountInstruction;
-
-From 705981aaff19b442b55df8a038aab9c61133bc3a Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 21:21:10 +0800
-Subject: [PATCH 116/140] Revert JDK-8263595: Remove oop type punning in
- JavaCallArguments
-
----
- src/hotspot/cpu/riscv/jniTypes_riscv.hpp | 5 +++--
- 1 file changed, 3 insertions(+), 2 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
-index bc4e5758256..df3c0267eea 100644
---- a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
-@@ -65,8 +65,9 @@ class JNITypes : private AllStatic {
-   }
- 
-   // Oops are stored in native format in one JavaCallArgument slot at *to.
--  static inline void    put_obj(const Handle& from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle.raw_value(); }
--  static inline void    put_obj(jobject       from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle; }
-+  static inline void    put_obj(oop  from, intptr_t *to)                { *(oop *)(to +   0  ) =  from; }
-+  static inline void    put_obj(oop  from, intptr_t *to, int& pos)      { *(oop *)(to + pos++) =  from; }
-+  static inline void    put_obj(oop *from, intptr_t *to, int& pos)      { *(oop *)(to + pos++) = *from; }
- 
-   // Floats are stored in native format in one JavaCallArgument slot at *to.
-   static inline void    put_float(jfloat  from, intptr_t *to)           { *(jfloat *)(to +   0  ) =  from;  }
-
-From bba22725b9f1386d8899941ccee3e8dc7f9a4a6f Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 21:33:01 +0800
-Subject: [PATCH 117/140] Revert JDK-8260012: Reduce inclusion of
- collectedHeap.hpp and heapInspection.hpp
-
----
- src/hotspot/cpu/riscv/frame_riscv.cpp | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
-index 40ec584b994..d4fcbdcbbde 100644
---- a/src/hotspot/cpu/riscv/frame_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
-@@ -598,7 +598,7 @@ BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result)
-         oop* obj_p = (oop*)tos_addr;
-         obj = (obj_p == NULL) ? (oop)NULL : *obj_p;
-       }
--      assert(Universe::is_in_heap_or_null(obj), "sanity check");
-+      assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check");
-       *oop_result = obj;
-       break;
-     }
-
-From 49000a43408aba29d3dc9ee4e03219e6f85be602 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 21:35:21 +0800
-Subject: [PATCH 118/140] Revert JDK-8271869: AArch64: build errors with GCC11
- in frame::saved_oop_result
-
----
- src/hotspot/cpu/riscv/frame_riscv.inline.hpp | 3 ---
- 1 file changed, 3 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
-index 5ac1bf57f57..abd5bda7e49 100644
---- a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
-+++ b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
-@@ -230,8 +230,6 @@ inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
- 
- 
- // Compiled frames
--PRAGMA_DIAG_PUSH
--PRAGMA_NONNULL_IGNORED
- inline oop frame::saved_oop_result(RegisterMap* map) const {
-   oop* result_adr = (oop *)map->location(x10->as_VMReg());
-   guarantee(result_adr != NULL, "bad register save location");
-@@ -243,6 +241,5 @@ inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
-   guarantee(result_adr != NULL, "bad register save location");
-   *result_adr = obj;
- }
--PRAGMA_DIAG_POP
- 
- #endif // CPU_RISCV_FRAME_RISCV_INLINE_HPP
-
-From 14a46a85e65f6fec09ac566d49a6232216881adb Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 21:40:43 +0800
-Subject: [PATCH 119/140] Revert JDK-8230392: Define AArch64 as
- MULTI_COPY_ATOMIC
-
----
- src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp | 4 ----
- 1 file changed, 4 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
-index ffd420da024..606f0fa0da3 100644
---- a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
-@@ -33,10 +33,6 @@ const int StackAlignmentInBytes = 16;
- // 32-bit integer argument values are extended to 64 bits.
- const bool CCallingConventionRequiresIntsAsLongs = false;
- 
--// RISCV has adopted a multicopy atomic model closely following
--// that of ARMv8.
--#define CPU_MULTI_COPY_ATOMIC
--
- // To be safe, we deoptimize when we come across an access that needs
- // patching. This is similar to what is done on aarch64.
- #define DEOPTIMIZE_WHEN_PATCHING
-
-From 8740928267a831c62f1deb20c910e3c27716bc40 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 21:42:20 +0800
-Subject: [PATCH 120/140] Revert: JDK-8246689: Enable independent compressed
- oops/class ptrs on Aarch64 JDK-8241825: Make compressed oops and compressed
- class pointers independent (x86_64, PPC, S390)
-
----
- src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp | 2 --
- 1 file changed, 2 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
-index 606f0fa0da3..acdf75d324e 100644
---- a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
-@@ -41,6 +41,4 @@ const bool CCallingConventionRequiresIntsAsLongs = false;
- 
- #define SUPPORT_RESERVED_STACK_AREA
- 
--#define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS false
--
- #endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
-
-From 94b40f4efccc19c8ac66eda6c57381a222b02d2d Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 21:50:49 +0800
-Subject: [PATCH 121/140] Revert JDK-8222637: Obsolete NeedsDeoptSuspend
-
----
- src/hotspot/cpu/riscv/globals_riscv.hpp | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
-index b78f258a764..a838a377829 100644
---- a/src/hotspot/cpu/riscv/globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
-@@ -32,6 +32,8 @@
- // Sets the default values for platform dependent flags used by the runtime system.
- // (see globals.hpp)
- 
-+define_pd_global(bool, NeedsDeoptSuspend,        false); // only register window machines need this
++    final private boolean checkUseSHA;
 +
- define_pd_global(bool, ImplicitNullChecks,       true);  // Generate code for implicit null checks
- define_pd_global(bool, TrapBasedNullChecks,      false);
- define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs past to check cast
-
-From 09968c9fc102fd32bc628d3e6fd9d9adcbec4373 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 21:52:44 +0800
-Subject: [PATCH 122/140] Revert JDK-8220051: Remove global safepoint code
-
----
- src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
-index acdf75d324e..d6ce8da07b8 100644
---- a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
-@@ -41,4 +41,6 @@ const bool CCallingConventionRequiresIntsAsLongs = false;
- 
- #define SUPPORT_RESERVED_STACK_AREA
- 
-+#define THREAD_LOCAL_POLL
++    public GenericTestCaseForUnsupportedRISCV64CPU(String optionName) {
++        this(optionName, true);
++    }
 +
- #endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
-
-From 2f4fb2b5ac420d456421592dc09b81244636ba4d Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 22:00:52 +0800
-Subject: [PATCH 123/140] Revert JDK-8272873: C2: Inlining should not depend on
- absolute call site counts
-
----
- src/hotspot/cpu/riscv/globals_riscv.hpp | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
-index a838a377829..b4f71c45ec1 100644
---- a/src/hotspot/cpu/riscv/globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
-@@ -41,6 +41,7 @@ define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs
- define_pd_global(uintx, CodeCacheSegmentSize,    64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment.
- define_pd_global(intx, CodeEntryAlignment,       64);
- define_pd_global(intx, OptoLoopAlignment,        16);
-+define_pd_global(intx, InlineFrequencyCount,     100);
- 
- #define DEFAULT_STACK_YELLOW_PAGES (2)
- #define DEFAULT_STACK_RED_PAGES (1)
-
-From 2df3625eea16fc0d45c0e4cf12c9433f0ec070fd Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 22:02:13 +0800
-Subject: [PATCH 124/140] Revert JDK-8220049: Obsolete ThreadLocalHandshakes
-
----
- src/hotspot/cpu/riscv/globals_riscv.hpp | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
-index b4f71c45ec1..b7d85373c4a 100644
---- a/src/hotspot/cpu/riscv/globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
-@@ -76,6 +76,8 @@ define_pd_global(bool, CompactStrings, true);
- // Clear short arrays bigger than one word in an arch-specific way
- define_pd_global(intx, InitArrayShortSize, BytesPerLong);
- 
-+define_pd_global(bool, ThreadLocalHandshakes, true);
++    public GenericTestCaseForUnsupportedRISCV64CPU(String optionName, boolean checkUseSHA) {
++        super(optionName, new AndPredicate(Platform::isRISCV64,
++                new NotPredicate(SHAOptionsBase.getPredicateForOption(
++                        optionName))));
 +
- define_pd_global(intx, InlineSmallCode,          1000);
- 
- #define ARCH_FLAGS(develop,                                                      \
-
-From a875c4caa423dd727cea1c891b17f4ded97e57d1 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sun, 30 Apr 2023 22:04:32 +0800
-Subject: [PATCH 125/140] Revert: JDK-8243208: Clean up JVMFlag implementation
- JDK-8236625: Remove writeable macro from JVM flags declaration
-
----
- src/hotspot/cpu/riscv/globals_riscv.hpp | 5 ++++-
- 1 file changed, 4 insertions(+), 1 deletion(-)
-
-diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
-index b7d85373c4a..0becd9efd35 100644
---- a/src/hotspot/cpu/riscv/globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
-@@ -82,9 +82,12 @@ define_pd_global(intx, InlineSmallCode,          1000);
- 
- #define ARCH_FLAGS(develop,                                                      \
-                    product,                                                      \
-+                   diagnostic,                                                   \
-+                   experimental,                                                 \
-                    notproduct,                                                   \
-                    range,                                                        \
--                   constraint)                                                   \
-+                   constraint,                                                   \
-+                   writeable)                                                    \
-                                                                                  \
-   product(bool, NearCpool, true,                                                 \
-          "constant pool is close to instructions")                               \
-
-From 19a9e6e8c3dba77cf8be0f25b1aec394aeca0b25 Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Wed, 10 May 2023 09:44:12 +0800
-Subject: [PATCH 126/140] Revert JDK-8213436: Obsolete UseMembar &&
- JDK-8188764: Obsolete AssumeMP and then remove all support for non-MP builds,
- always enabled
-
----
- src/hotspot/cpu/riscv/globals_riscv.hpp    | 2 ++
- src/hotspot/cpu/riscv/vm_version_riscv.cpp | 2 ++
- 2 files changed, 4 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
-index 0becd9efd35..e820898d87f 100644
---- a/src/hotspot/cpu/riscv/globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
-@@ -64,6 +64,8 @@ define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES);
- define_pd_global(bool, RewriteBytecodes,     true);
- define_pd_global(bool, RewriteFrequentPairs, true);
- 
-+define_pd_global(bool, UseMembar,            true);
++        this.checkUseSHA = checkUseSHA;
++    }
 +
- define_pd_global(bool, PreserveFramePointer, false);
- 
- // GC Ergo Flags
-diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-index 50ee7edb708..f13e4269b77 100644
---- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-@@ -139,6 +139,8 @@ void VM_Version::initialize() {
- #endif // COMPILER2
- 
-   UNSUPPORTED_OPTION(CriticalJNINatives);
++    @Override
++    protected void verifyWarnings() throws Throwable {
++        String shouldPassMessage = String.format("JVM startup should pass with"
++                + "option '-XX:-%s' without any warnings", optionName);
++        //Verify that option could be disabled without any warnings.
++        CommandLineOptionTest.verifySameJVMStartup(null, new String[] {
++                        SHAOptionsBase.getWarningForUnsupportedCPU(optionName)
++                }, shouldPassMessage, shouldPassMessage, ExitCode.OK,
++                SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
++                CommandLineOptionTest.prepareBooleanFlag(optionName, false));
 +
-+  FLAG_SET_DEFAULT(UseMembar, true);
- }
- 
- #ifdef COMPILER2
-
-From 0c4a9d1b6b3b3b31a1c105ff311414ae542764bb Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Mon, 1 May 2023 16:04:15 +0800
-Subject: [PATCH 127/140] Misc adaptations to jdk11u
-
----
- src/hotspot/cpu/riscv/c1_globals_riscv.hpp       |  2 +-
- .../linux_riscv/vm_version_linux_riscv.cpp       | 16 ++++++++--------
- 2 files changed, 9 insertions(+), 9 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-index 25e00bea901..9316d4be02e 100644
---- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-@@ -57,7 +57,7 @@ define_pd_global(uintx, CodeCacheMinBlockLength,     1);
- define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
- define_pd_global(uintx, MetaspaceSize,               12*M );
- define_pd_global(bool, NeverActAsServerClassMachine, true );
--define_pd_global(uint64_t, MaxRAM,                  1ULL*G);
-+define_pd_global(uint64_t, MaxRAM,                   1ULL*G);
- define_pd_global(bool, CICompileOSR,                 true );
- #endif // !TIERED
- define_pd_global(bool, UseTypeProfile,               false);
-diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
-index 4623dbfad42..60260854db6 100644
---- a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
-+++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
-@@ -83,14 +83,14 @@ void VM_Version::get_os_cpu_info() {
- 
-   uint64_t auxv = getauxval(AT_HWCAP);
- 
--  static_assert(CPU_I == HWCAP_ISA_I, "Flag CPU_I must follow Linux HWCAP");
--  static_assert(CPU_M == HWCAP_ISA_M, "Flag CPU_M must follow Linux HWCAP");
--  static_assert(CPU_A == HWCAP_ISA_A, "Flag CPU_A must follow Linux HWCAP");
--  static_assert(CPU_F == HWCAP_ISA_F, "Flag CPU_F must follow Linux HWCAP");
--  static_assert(CPU_D == HWCAP_ISA_D, "Flag CPU_D must follow Linux HWCAP");
--  static_assert(CPU_C == HWCAP_ISA_C, "Flag CPU_C must follow Linux HWCAP");
--  static_assert(CPU_V == HWCAP_ISA_V, "Flag CPU_V must follow Linux HWCAP");
--  static_assert(CPU_B == HWCAP_ISA_B, "Flag CPU_B must follow Linux HWCAP");
-+  STATIC_ASSERT(CPU_I == HWCAP_ISA_I);
-+  STATIC_ASSERT(CPU_M == HWCAP_ISA_M);
-+  STATIC_ASSERT(CPU_A == HWCAP_ISA_A);
-+  STATIC_ASSERT(CPU_F == HWCAP_ISA_F);
-+  STATIC_ASSERT(CPU_D == HWCAP_ISA_D);
-+  STATIC_ASSERT(CPU_C == HWCAP_ISA_C);
-+  STATIC_ASSERT(CPU_V == HWCAP_ISA_V);
-+  STATIC_ASSERT(CPU_B == HWCAP_ISA_B);
-   _features = auxv & (
-       HWCAP_ISA_I |
-       HWCAP_ISA_M |
-
-From 4ce5e05526029360ad15eb9639c9c05fac77ac8e Mon Sep 17 00:00:00 2001
-From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
-Date: Sat, 20 May 2023 17:51:52 +0800
-Subject: [PATCH 128/140] Save all call-clobbered registers for spark tests may
- crash
-
----
- .../cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp    | 10 ++--------
- 1 file changed, 2 insertions(+), 8 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-index bc847388f68..e191cbcee2a 100644
---- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-@@ -157,21 +157,15 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
-   __ j(done);
- 
-   __ bind(runtime);
--  // save the live input values
--  RegSet saved = RegSet::of(pre_val);
--  if (tosca_live) { saved += RegSet::of(x10); }
--  if (obj != noreg) { saved += RegSet::of(obj); }
--
--  __ push_reg(saved, sp);
- 
-+  __ push_call_clobbered_registers();
-   if (expand_call) {
-     assert(pre_val != c_rarg1, "smashed arg");
-     __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
-   } else {
-     __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
-   }
--
--  __ pop_reg(saved, sp);
-+  __ pop_call_clobbered_registers();
- 
-   __ bind(done);
++        if (checkUseSHA) {
++            shouldPassMessage = String.format("If JVM is started with '-XX:-"
++                    + "%s' '-XX:+%s', output should contain warning.",
++                    SHAOptionsBase.USE_SHA_OPTION, optionName);
++
++            // Verify that when the tested option is enabled, then
++            // a warning will occur in VM output if UseSHA is disabled.
++            if (!optionName.equals(SHAOptionsBase.USE_SHA_OPTION)) {
++                CommandLineOptionTest.verifySameJVMStartup(
++                        new String[] { SHAOptionsBase.getWarningForUnsupportedCPU(optionName) },
++                        null,
++                        shouldPassMessage,
++                        shouldPassMessage,
++                        ExitCode.OK,
++                        SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
++                        CommandLineOptionTest.prepareBooleanFlag(SHAOptionsBase.USE_SHA_OPTION, false),
++                        CommandLineOptionTest.prepareBooleanFlag(optionName, true));
++            }
++        }
++    }
++
++    @Override
++    protected void verifyOptionValues() throws Throwable {
++        // Verify that option is disabled by default.
++        CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
++                String.format("Option '%s' should be disabled by default",
++                        optionName),
++                SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS);
++
++        if (checkUseSHA) {
++            // Verify that option is disabled even if it was explicitly enabled
++            // using CLI options.
++            CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
++                    String.format("Option '%s' should be off on unsupported "
++                            + "RISCV64CPU even if set to true directly", optionName),
++                    SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
++                    CommandLineOptionTest.prepareBooleanFlag(optionName, true));
++
++            // Verify that option is disabled when +UseSHA was passed to JVM.
++            CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
++                    String.format("Option '%s' should be off on unsupported "
++                            + "RISCV64CPU even if %s flag set to JVM",
++                            optionName, CommandLineOptionTest.prepareBooleanFlag(
++                                  SHAOptionsBase.USE_SHA_OPTION, true)),
++                    SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
++                    CommandLineOptionTest.prepareBooleanFlag(
++                            SHAOptionsBase.USE_SHA_OPTION, true));
++        }
++    }
++}
+diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java
+index 2e3e2717a6..7be8af6d03 100644
+--- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java
++++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,7 +25,7 @@
+  * @test
+  * @bug 8074981
+  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test
+- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
+  *
+  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
+  *      -XX:CompileThresholdScaling=0.1
+diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java
+index 0e06a9e432..797927b42b 100644
+--- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java
++++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,7 +25,7 @@
+  * @test
+  * @bug 8074981
+  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test
+- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
+  *
+  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
+  *      -XX:CompileThresholdScaling=0.1
+diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java
+index c3cdbf3746..be8f7d586c 100644
+--- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java
++++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,7 +25,7 @@
+  * @test
+  * @bug 8074981
+  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test
+- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
+  *
+  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
+  *      -XX:CompileThresholdScaling=0.1
+diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java
+index d33bd411f1..d96d5e29c0 100644
+--- a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java
++++ b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,7 +25,7 @@
+  * @test
+  * @bug 8074981
+  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test
+- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
+  *
+  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions
+  *      -XX:LoopUnrollLimit=250 -XX:CompileThresholdScaling=0.1
+diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java
+index 992fa4b516..b09c873d05 100644
+--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java
++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,7 +25,7 @@
+  * @test
+  * @bug 8138583
+  * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : double abs & neg test
+- * @requires os.arch=="aarch64"
++ * @requires os.arch=="aarch64" | os.arch=="riscv64"
+  *
+  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
+  *      -XX:CompileThresholdScaling=0.1
+diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java
+index 3e79b3528b..fe40ed6f98 100644
+--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java
++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,7 +25,7 @@
+  * @test
+  * @bug 8138583
+  * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : float abs & neg test
+- * @requires os.arch=="aarch64"
++ * @requires os.arch=="aarch64" | os.arch=="riscv64"
+  *
+  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
+  *      -XX:CompileThresholdScaling=0.1
+diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java
+index 6603dd224e..5163191049 100644
+--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java
++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,7 +25,7 @@
+  * @test
+  * @bug 8135028
+  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double sqrt test
+- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
+  *
+  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
+  *      -XX:CompileThresholdScaling=0.1
+diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java
+index d9a0c98800..d999ae423c 100644
+--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java
++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,7 +25,7 @@
+  * @test
+  * @bug 8074981
+  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double test
+- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
+  *
+  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
+  *      -XX:CompileThresholdScaling=0.1
+diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java
+index 722db95aed..65912a5c7f 100644
+--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java
++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,7 +25,7 @@
+  * @test
+  * @bug 8074981
+  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : float test
+- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
+  *
+  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
+  *      -XX:CompileThresholdScaling=0.1
+diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java
+index f58f21feb2..fffdc2f756 100644
+--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java
++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,7 +25,7 @@
+  * @test
+  * @bug 8074981
+  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : int test
+- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
+  *
+  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
+  *      -XX:CompileThresholdScaling=0.1
+diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
+index acb86812d2..2c866f26f0 100644
+--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
++++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
+@@ -24,7 +24,7 @@
  
-
-From 1b8778b0831571e9ac688bbd22afca4cf8f62407 Mon Sep 17 00:00:00 2001
-From: Kuai Wei <kuaiwei.kw@alibaba-inc.com>
-Date: Tue, 22 Aug 2023 16:17:31 +0800
-Subject: [PATCH 129/140] Build with gcc 13
-
----
- src/hotspot/cpu/riscv/macroAssembler_riscv.cpp        | 1 +
- src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp | 1 +
- 2 files changed, 2 insertions(+)
-
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index 37ccf132986..fd18bb77058 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -49,6 +49,7 @@
- #include "runtime/thread.hpp"
- #ifdef COMPILER2
- #include "opto/compile.hpp"
-+#include "opto/intrinsicnode.hpp"
- #include "opto/node.hpp"
- #include "opto/output.hpp"
- #endif
-diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
-index 31d9254d8ad..ccceed643ed 100644
---- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
-+++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
-@@ -24,6 +24,7 @@
+ /* @test
+  * @bug 8167409
+- * @requires (os.arch != "aarch64") & (os.arch != "arm")
++ * @requires (os.arch != "aarch64") & (os.arch != "riscv64") & (os.arch != "arm")
+  * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.argumentcorruption.CheckLongArgs
   */
+ package compiler.runtime.criticalnatives.argumentcorruption;
+diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
+index eab36f9311..1da369fde2 100644
+--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
++++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
+@@ -24,7 +24,7 @@
  
- #include "precompiled.hpp"
-+#include "memory/metaspaceShared.hpp"
- #include "runtime/frame.inline.hpp"
- #include "runtime/thread.inline.hpp"
- 
-
-From 4c23be6665aec94462e82e3b4adcf7abb5b23981 Mon Sep 17 00:00:00 2001
-From: Kuai Wei <kuaiwei.kw@alibaba-inc.com>
-Date: Tue, 5 Sep 2023 15:37:43 +0800
-Subject: [PATCH 130/140] Fix copyright information
-
----
- make/autoconf/build-aux/config.guess                            | 2 +-
- .../MyPackage/HeapMonitorEventsForTwoThreadsTest.java           | 1 +
- 2 files changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/make/autoconf/build-aux/config.guess b/make/autoconf/build-aux/config.guess
-index 15111d827ab..a88a9adec3f 100644
---- a/make/autoconf/build-aux/config.guess
-+++ b/make/autoconf/build-aux/config.guess
-@@ -1,6 +1,6 @@
- #!/bin/sh
- #
--# Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved.
-+# Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights reserved.
- # Copyright (c) 2021, Azul Systems, Inc. All rights reserved.
- # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- #
-diff --git a/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java b/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java
-index f0b7aed5ceb..54640b245f8 100644
---- a/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java
-+++ b/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java
-@@ -1,4 +1,5 @@
+ /* @test
+  * @bug 8167408
+- * @requires (os.arch != "aarch64") & (os.arch != "arm")
++ * @requires (os.arch != "aarch64") & (os.arch != "riscv64") & (os.arch != "arm")
+  * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.lookup.LookUp
+  */
+ package compiler.runtime.criticalnatives.lookup;
+diff --git a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
+index 7774dabcb5..7afe3560f3 100644
+--- a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
++++ b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
+@@ -1,5 +1,5 @@
  /*
-+ * Copyright (c) 2018, Google and/or its affiliates. All rights reserved.
+- * Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-
-From 70a060f73c3617e58f881bcee19f1a3ce43f54ff Mon Sep 17 00:00:00 2001
-From: Chris Plummer <cjplummer@openjdk.org>
-Date: Thu, 2 Jul 2020 13:13:10 -0700
-Subject: [PATCH 131/140] 8247533: SA stack walking sometimes fails with
- sun.jvm.hotspot.debugger.DebuggerException: get_thread_regs failed for a lwp
-
-Reviewed-by: sspitsyn, ysuenaga, dtitov
----
- .../native/libsaproc/LinuxDebuggerLocal.c     |  8 ++++++-
- .../linux/native/libsaproc/ps_proc.c          |  3 ++-
- .../native/libsaproc/MacosxDebuggerLocal.m    | 24 ++++++++++++-------
- .../debugger/bsd/BsdDebuggerLocal.java        |  2 +-
- .../jvm/hotspot/debugger/bsd/BsdThread.java   | 10 +++++---
- .../debugger/linux/LinuxDebuggerLocal.java    |  2 +-
- .../hotspot/debugger/linux/LinuxThread.java   | 10 +++++---
- .../windbg/amd64/WindbgAMD64Thread.java       | 15 ++++++++----
- .../windows/native/libsaproc/sawindbg.cpp     | 14 ++++++++---
- 9 files changed, 61 insertions(+), 27 deletions(-)
-
-diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
-index 45a927fb5ee..6f1887f8113 100644
---- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
-+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
-@@ -413,7 +413,13 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
- 
-   struct ps_prochandle* ph = get_proc_handle(env, this_obj);
-   if (get_lwp_regs(ph, lwp_id, &gregs) != true) {
--     THROW_NEW_DEBUGGER_EXCEPTION_("get_thread_regs failed for a lwp", 0);
-+    // This is not considered fatal and does happen on occassion, usually with an
-+    // ESRCH error. The root cause is not fully understood, but by ignoring this error
-+    // and returning NULL, stacking walking code will get null registers and fallback
-+    // to using the "last java frame" if setup.
-+    fprintf(stdout, "WARNING: getThreadIntegerRegisterSet0: get_lwp_regs failed for lwp (%d)\n", lwp_id);
-+    fflush(stdout);
-+    return NULL;
-   }
+@@ -61,15 +61,17 @@ public class IntrinsicPredicates {
  
- #undef NPRGREG
-diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
-index de5254d859e..691c3f6684a 100644
---- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
-+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
-@@ -144,7 +144,8 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use
+     public static final BooleanSupplier SHA1_INSTRUCTION_AVAILABLE
+             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha1" }, null),
++              new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha1" }, null),
+               new OrPredicate(new CPUSpecificPredicate("s390.*",    new String[] { "sha1" }, null),
+               new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha1" }, null),
+               // x86 variants
+               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "sha" },  null),
+               new OrPredicate(new CPUSpecificPredicate("i386.*",    new String[] { "sha" },  null),
+-                              new CPUSpecificPredicate("x86.*",     new String[] { "sha" },  null))))));
++                              new CPUSpecificPredicate("x86.*",     new String[] { "sha" },  null)))))));
  
- #ifdef PTRACE_GETREGS_REQ
-  if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) {
--   print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp %d\n", pid);
-+   print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp(%d) errno(%d) \"%s\"\n", pid,
-+               errno, strerror(errno));
-    return false;
-  }
-  return true;
-diff --git a/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m b/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m
-index 18b8b4282fe..e46370a1f18 100644
---- a/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m
-+++ b/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m
-@@ -685,7 +685,7 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo
-   JNIEnv *env, jobject this_obj,
-   jlong thread_id)
- {
--  print_debug("getThreadRegisterSet0 called\n");
-+  print_debug("getThreadIntegerRegisterSet0 called\n");
+     public static final BooleanSupplier SHA256_INSTRUCTION_AVAILABLE
+             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha256"       }, null),
++              new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha256"       }, null),
+               new OrPredicate(new CPUSpecificPredicate("s390.*",    new String[] { "sha256"       }, null),
+               new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha256"       }, null),
+               new OrPredicate(new CPUSpecificPredicate("ppc64.*",   new String[] { "sha"          }, null),
+@@ -79,10 +81,11 @@ public class IntrinsicPredicates {
+               new OrPredicate(new CPUSpecificPredicate("i386.*",    new String[] { "sha"          }, null),
+               new OrPredicate(new CPUSpecificPredicate("x86.*",     new String[] { "sha"          }, null),
+               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "avx2", "bmi2" }, null),
+-                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null))))))))));
++                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null)))))))))));
  
-   struct ps_prochandle* ph = get_proc_handle(env, this_obj);
-   if (ph != NULL && ph->core != NULL) {
-@@ -705,7 +705,13 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo
-   result = thread_get_state(tid, HSDB_THREAD_STATE, (thread_state_t)&state, &count);
+     public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE
+             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha512"       }, null),
++              new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha512"       }, null),
+               new OrPredicate(new CPUSpecificPredicate("s390.*",    new String[] { "sha512"       }, null),
+               new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha512"       }, null),
+               new OrPredicate(new CPUSpecificPredicate("ppc64.*",   new String[] { "sha"          }, null),
+@@ -92,7 +95,7 @@ public class IntrinsicPredicates {
+               new OrPredicate(new CPUSpecificPredicate("i386.*",    new String[] { "sha"          }, null),
+               new OrPredicate(new CPUSpecificPredicate("x86.*",     new String[] { "sha"          }, null),
+               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "avx2", "bmi2" }, null),
+-                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null))))))))));
++                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null)))))))))));
  
-   if (result != KERN_SUCCESS) {
--    print_error("getregs: thread_get_state(%d) failed (%d)\n", tid, result);
-+    // This is not considered fatal. Unlike on Linux and Windows, we haven't seen a
-+    // failure to get thread registers, but if it were to fail the response should
-+    // be the same. By ignoring this error and returning NULL, stacking walking code
-+    // will get null registers and fallback to using the "last java frame" if setup.
-+    fprintf(stdout, "WARNING: getThreadIntegerRegisterSet0: thread_get_state failed (%d) for thread (%d)\n",
-+            result, tid);
-+    fflush(stdout);
-     return NULL;
-   }
+     public static final BooleanSupplier ANY_SHA_INSTRUCTION_AVAILABLE
+             = new OrPredicate(IntrinsicPredicates.SHA1_INSTRUCTION_AVAILABLE,
+diff --git a/test/hotspot/jtreg/gc/stress/TestStressG1Humongous.java b/test/hotspot/jtreg/gc/stress/TestStressG1Humongous.java
+index 5aea51a24f..da63e02555 100644
+--- a/test/hotspot/jtreg/gc/stress/TestStressG1Humongous.java
++++ b/test/hotspot/jtreg/gc/stress/TestStressG1Humongous.java
+@@ -24,14 +24,41 @@
+ package gc.stress;
  
-@@ -808,25 +814,25 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo
+ /*
+- * @test TestStressG1Humongous
++ * @test
+  * @key gc stress
+  * @summary Stress G1 by humongous allocations in situation near OOM
+  * @requires vm.gc.G1
+  * @requires !vm.flightRecorder
+  * @library /test/lib
+  * @modules java.base/jdk.internal.misc
+- * @run driver/timeout=1300 gc.stress.TestStressG1Humongous
++ * @run driver/timeout=180 gc.stress.TestStressG1Humongous 4 3 1.1 120
++ */
++
++/*
++ * @test
++ * @requires vm.gc.G1
++ * @requires !vm.flightRecorder
++ * @library /test/lib
++ * @modules java.base/jdk.internal.misc
++ * @run driver/timeout=180 gc.stress.TestStressG1Humongous 16 5 2.1 120
++ */
++
++/*
++ * @test
++ * @requires vm.gc.G1
++ * @requires !vm.flightRecorder
++ * @library /test/lib
++ * @modules java.base/jdk.internal.misc
++ * @run driver/timeout=180 gc.stress.TestStressG1Humongous 32 4 0.6 120
++ */
++
++/*
++ * @test
++ * @requires vm.gc.G1
++ * @requires !vm.flightRecorder
++ * @library /test/lib
++ * @modules java.base/jdk.internal.misc
++ * @run driver/timeout=900 gc.stress.TestStressG1Humongous 1 7 0.6 600
   */
- JNIEXPORT jint JNICALL
- Java_sun_jvm_hotspot_debugger_macosx_MacOSXDebuggerLocal_translateTID0(
--  JNIEnv *env, jobject this_obj, jint tid) 
-+  JNIEnv *env, jobject this_obj, jint tid)
- {
-   print_debug("translateTID0 called on tid = 0x%x\n", (int)tid);
  
-   kern_return_t result;
-   thread_t foreign_tid, usable_tid;
-   mach_msg_type_name_t type;
--  
-+
-   foreign_tid = tid;
--    
-+
-   task_t gTask = getTask(env, this_obj);
--  result = mach_port_extract_right(gTask, foreign_tid, 
--				   MACH_MSG_TYPE_COPY_SEND, 
-+  result = mach_port_extract_right(gTask, foreign_tid,
-+				   MACH_MSG_TYPE_COPY_SEND,
- 				   &usable_tid, &type);
-   if (result != KERN_SUCCESS)
-     return -1;
--    
-+
-   print_debug("translateTID0: 0x%x -> 0x%x\n", foreign_tid, usable_tid);
--    
-+
-   return (jint) usable_tid;
- }
+ import java.util.ArrayList;
+@@ -48,17 +75,19 @@ import jdk.test.lib.process.OutputAnalyzer;
+ public class TestStressG1Humongous{
+ 
+     public static void main(String[] args) throws Exception {
++        if (args.length != 4) {
++            throw new IllegalArgumentException("Test expects 4 arguments");
++        }
++
+         // Limit heap size on 32-bit platforms
+         int heapSize = Platform.is32bit() ? 512 : 1024;
+-        // Heap size, region size, threads, humongous size, timeout
+-        run(heapSize, 4, 3, 1.1, 120);
+-        run(heapSize, 16, 5, 2.1, 120);
+-        run(heapSize, 32, 4, 0.6, 120);
+-        run(heapSize, 1, 7, 0.6, 600);
+-    }
  
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java
-index 655b450c3fc..d0557a7d254 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java
-@@ -166,7 +166,7 @@ public WorkerThreadTask execute(WorkerThreadTask task) throws DebuggerException
-                 } catch (InterruptedException x) {}
-              }
-              if (lastException != null) {
--                throw new DebuggerException(lastException);
-+                throw new DebuggerException(lastException.getMessage(), lastException);
-              } else {
-                 return task;
-              }
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java
-index 0d637f30f14..c52d3a51d54 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java
+-    private static void run(int heapSize, int regionSize, int threads, double humongousSize, int timeout)
+-            throws Exception {
++        // Region size, threads, humongous size, and timeout passed as @run arguments
++        int regionSize = Integer.parseInt(args[0]);
++        int threads = Integer.parseInt(args[1]);
++        double humongousSize = Double.parseDouble(args[2]);
++        int timeout = Integer.parseInt(args[3]);
++
+         ArrayList<String> options = new ArrayList<>();
+         Collections.addAll(options, Utils.getTestJavaOpts());
+         Collections.addAll(options,
+diff --git a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java
+index 57256aa5a3..d4d43b01ae 100644
+--- a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java
++++ b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java
 @@ -1,5 +1,5 @@
  /*
-- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -67,8 +67,12 @@ public String toString() {
-     public ThreadContext getContext() throws IllegalThreadStateException {
-         long[] data = debugger.getThreadIntegerRegisterSet(unique_thread_id);
-         ThreadContext context = BsdThreadContextFactory.createThreadContext(debugger);
--        for (int i = 0; i < data.length; i++) {
--            context.setRegister(i, data[i]);
-+        // null means we failed to get the register set for some reason. The caller
-+        // is responsible for dealing with the set of null registers in that case.
-+        if (data != null) {
-+            for (int i = 0; i < data.length; i++) {
-+                context.setRegister(i, data[i]);
-+            }
-         }
-         return context;
-     }
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java
-index cb6712b58ee..6a0648f508a 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java
-@@ -173,7 +173,7 @@ public WorkerThreadTask execute(WorkerThreadTask task) throws DebuggerException
-                 } catch (InterruptedException x) {}
-              }
-              if (lastException != null) {
--                throw new DebuggerException(lastException);
-+                throw new DebuggerException(lastException.getMessage(), lastException);
-              } else {
-                 return task;
-              }
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java
-index 52307b9cdcf..3fe795d34bc 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java
+@@ -112,7 +112,7 @@ public class CheckForProperDetailStackTrace {
+             // It's ok for ARM not to have symbols, because it does not support NMT detail
+             // when targeting thumb2. It's also ok for Windows not to have symbols, because
+             // they are only available if the symbols file is included with the build.
+-            if (Platform.isWindows() || Platform.isARM()) {
++            if (Platform.isWindows() || Platform.isARM() || Platform.isRISCV64()) {
+                 return; // we are done
+             }
+             output.reportDiagnosticSummary();
+diff --git a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
+index 127bb6abcd..eab19273ad 100644
+--- a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
++++ b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
 @@ -1,5 +1,5 @@
  /*
-- * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -73,8 +73,12 @@ public String toString() {
-     public ThreadContext getContext() throws IllegalThreadStateException {
-         long[] data = debugger.getThreadIntegerRegisterSet(lwp_id);
-         ThreadContext context = LinuxThreadContextFactory.createThreadContext(debugger);
--        for (int i = 0; i < data.length; i++) {
--            context.setRegister(i, data[i]);
-+        // null means we failed to get the register set for some reason. The caller
-+        // is responsible for dealing with the set of null registers in that case.
-+        if (data != null) {
-+            for (int i = 0; i < data.length; i++) {
-+                context.setRegister(i, data[i]);
-+            }
-         }
-         return context;
+@@ -239,7 +239,7 @@ public class ReservedStackTest {
+         return Platform.isAix() ||
+             (Platform.isLinux() &&
+              (Platform.isPPC() || Platform.isS390x() || Platform.isX64() ||
+-              Platform.isX86())) ||
++              Platform.isX86() || Platform.isRISCV64())) ||
+             Platform.isOSX() ||
+             Platform.isSolaris();
      }
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java
-index ec5aea35e8c..377650a0a1c 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java
+diff --git a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
+index 126a43a900..feb4de5388 100644
+--- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
++++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
+@@ -45,7 +45,7 @@ import java.util.Set;
+  */
+ public class TestMutuallyExclusivePlatformPredicates {
+     private static enum MethodGroup {
+-        ARCH("isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"),
++        ARCH("isAArch64", "isARM", "isRISCV64", "isPPC", "isS390x", "isSparc", "isX64", "isX86"),
+         BITNESS("is32bit", "is64bit"),
+         OS("isAix", "isLinux", "isOSX", "isSolaris", "isWindows"),
+         VM_TYPE("isClient", "isServer", "isGraal", "isMinimal", "isZero", "isEmbedded"),
+diff --git a/test/jdk/java/awt/List/MouseDraggedOutCauseScrollingTest/MouseDraggedOutCauseScrollingTest.html b/test/jdk/java/awt/List/MouseDraggedOutCauseScrollingTest/MouseDraggedOutCauseScrollingTest.html
+deleted file mode 100644
+index 7049e82703..0000000000
+--- a/test/jdk/java/awt/List/MouseDraggedOutCauseScrollingTest/MouseDraggedOutCauseScrollingTest.html
++++ /dev/null
+@@ -1,43 +0,0 @@
+-<html>
+-<!--
+-  Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+-  DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+-
+-  This code is free software; you can redistribute it and/or modify it
+-  under the terms of the GNU General Public License version 2 only, as
+-  published by the Free Software Foundation.
+-
+-  This code is distributed in the hope that it will be useful, but WITHOUT
+-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+-  version 2 for more details (a copy is included in the LICENSE file that
+-  accompanied this code).
+-
+-  You should have received a copy of the GNU General Public License version
+-  2 along with this work; if not, write to the Free Software Foundation,
+-  Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+-
+-  Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+-  or visit www.oracle.com if you need additional information or have any
+-  questions.
+-  -->
+-
+-<!--  
+-  @test
+-  @bug 6243382 8006070
+-  @summary Dragging of mouse outside of a List and Choice area don't work properly on XAWT
+-  @author Dmitry.Cherepanov@SUN.COM area=awt.list
+-  @run applet/manual=yesno MouseDraggedOutCauseScrollingTest.html
+-  -->
+-<head>
+-<title> ManualYesNoTest </title>
+-</head>
+-<body>
+-
+-<h1>ManualYesNoTest<br>Bug ID: </h1>
+-
+-<p> See the dialog box (usually in upper left corner) for instructions</p>
+-
+-<APPLET CODE="MouseDraggedOutCauseScrollingTest.class" WIDTH=200 HEIGHT=200></APPLET>
+-</body>
+-</html>
+diff --git a/test/jdk/java/awt/List/MouseDraggedOutCauseScrollingTest/MouseDraggedOutCauseScrollingTest.java b/test/jdk/java/awt/List/MouseDraggedOutCauseScrollingTest/MouseDraggedOutCauseScrollingTest.java
+index 8b509a1231..446b7a3a93 100644
+--- a/test/jdk/java/awt/List/MouseDraggedOutCauseScrollingTest/MouseDraggedOutCauseScrollingTest.java
++++ b/test/jdk/java/awt/List/MouseDraggedOutCauseScrollingTest/MouseDraggedOutCauseScrollingTest.java
 @@ -1,5 +1,5 @@
  /*
-- * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2013, 2024, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -30,9 +30,9 @@
- 
- class WindbgAMD64Thread implements ThreadProxy {
-   private WindbgDebugger debugger;
--  private long           sysId;
-+  private long           sysId; // SystemID for Windows thread, stored in OSThread::_thread_id
-   private boolean        gotID;
--  private long           id;
-+  private long           id;    // ThreadID for Windows thread,  returned by GetThreadIdBySystemId
- 
-   // The address argument must be the address of the OSThread::_thread_id
-   WindbgAMD64Thread(WindbgDebugger debugger, Address addr) {
-@@ -50,8 +50,12 @@ class WindbgAMD64Thread implements ThreadProxy {
-   public ThreadContext getContext() throws IllegalThreadStateException {
-     long[] data = debugger.getThreadIntegerRegisterSet(getThreadID());
-     WindbgAMD64ThreadContext context = new WindbgAMD64ThreadContext(debugger);
--    for (int i = 0; i < data.length; i++) {
--      context.setRegister(i, data[i]);
-+    // null means we failed to get the register set for some reason. The caller
-+    // is responsible for dealing with the set of null registers in that case.
-+    if (data != null) {
-+        for (int i = 0; i < data.length; i++) {
-+            context.setRegister(i, data[i]);
-+        }
-     }
-     return context;
-   }
-@@ -86,6 +90,7 @@ public String toString() {
-   private long getThreadID() {
-     if (!gotID) {
-        id = debugger.getThreadIdFromSysId(sysId);
-+       gotID = true;
-     }
- 
-     return id;
-diff --git a/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp b/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp
-index 314cf69c957..e3b218b4dae 100644
---- a/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp
-+++ b/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp
-@@ -45,6 +45,7 @@
+@@ -22,29 +22,29 @@
+  */
  
- #include <limits.h>
- #include <windows.h>
-+#include <inttypes.h>
+ /*
+-  test
++  @test
+   @bug 6243382 8006070
+   @summary Dragging of mouse outside of a List and Choice area don't work properly on XAWT
+-  @author Dmitry.Cherepanov@SUN.COM area=awt.list
+-  @run applet/manual=yesno MouseDraggedOutCauseScrollingTest.html
++  @requires (os.family == "linux")
++  @library /java/awt/regtesthelpers
++  @run main/manual MouseDraggedOutCauseScrollingTest
+ */
+ 
+-import java.applet.Applet;
+-import java.awt.*;
++import java.awt.Choice;
++import java.awt.Frame;
++import java.awt.GridLayout;
++import java.awt.List;
++import java.awt.Toolkit;
+ 
+-public class MouseDraggedOutCauseScrollingTest extends Applet
+-{
+-    Choice choice;
+-    List singleList;
+-    List multipleList;
++public class MouseDraggedOutCauseScrollingTest {
  
- #define DEBUG_NO_IMPLEMENTATION
- #include <dbgeng.h>
-@@ -765,9 +766,16 @@ JNIEXPORT jlong JNICALL Java_sun_jvm_hotspot_debugger_windbg_WindbgDebuggerLocal
-   CHECK_EXCEPTION_(0);
+-    public void init()
+-    {
+-        this.setLayout (new GridLayout (1, 3));
++    static Frame createUI() {
++        Frame frame = new Frame("MouseDraggedOutCausesScrollingTest");
++        frame.setLayout(new GridLayout(1, 3));
+ 
+-        choice = new Choice();
+-        singleList = new List(3, false);
+-        multipleList = new List(3, true);
++        Choice choice = new Choice();
++        List singleList = new List(3, false);
++        List multipleList = new List(3, true);
+ 
+         choice.add("Choice");
+         for (int i = 1; i < 100; i++){
+@@ -59,188 +59,66 @@ public class MouseDraggedOutCauseScrollingTest extends Applet
+         for (int i = 1; i < 100; i++)
+             multipleList.add(""+i);
+ 
+-        this.add(choice);
+-        this.add(singleList);
+-        this.add(multipleList);
++        frame.add(choice);
++        frame.add(singleList);
++        frame.add(multipleList);
++        frame.setSize(400, 100);
++        return frame;
++    }
+ 
++    public static void main(String[] args) throws Exception {
+         String toolkitName = Toolkit.getDefaultToolkit().getClass().getName();
++
+         if (!toolkitName.equals("sun.awt.X11.XToolkit")) {
+-            String[] instructions =
+-            {
+-                "This test is not applicable to the current platform. Press PASS"
+-            };
+-            Sysout.createDialogWithInstructions( instructions );
+-        } else {
+-            String[] instructions =
+-            {
+-                "0) Please note, that this is only Motif/XAWT test. At first, make the applet active",
+-                "1.1) Click on the choice",
+-                "1.2) Press the left button of the mouse and keep on any item of the choice, for example 5",
+-                "1.3) Drag mouse out of the area of the unfurled list, at the same time hold the X coordinate of the mouse position about the same",
+-                "1.4) To make sure, that when the Y coordinate of the mouse position higher of the upper bound of the list then scrolling UP of the list and selected item changes on the upper. If not, the test failed",
+-                "1.5) To make sure, that when the Y coordinate of the mouse position under of the lower bound of the list then scrolling DOWN of the list and selected item changes on the lower. If not, the test failed",
+-                "-----------------------------------",
+-                "2.1) Click on the single list",
+-                "2.2) Press the left button of the mouse and keep on any item of the list, for example 5",
+-                "2.3) Drag mouse out of the area of the unfurled list, at the same time hold the X coordinate of the mouse position about the same",
+-                "2.4) To make sure, that when the Y coordinate of the mouse position higher of the upper bound of the list then scrolling UP of the list and selected item changes on the upper. If not, the test failed",
+-                "2.5) To make sure, that when the Y coordinate of the mouse position under of the lower bound of the list then scrolling DOWN of the list and selected item changes on the lower. If not, the test failed",
+-                "-----------------------------------",
+-                "3.1) Click on the multiple list",
+-                "3.2) Press the left button of the mouse and keep on any item of the list, for example 5",
+-                "3.3) Drag mouse out of the area of the unfurled list, at the same time hold the X coordinate of the mouse position about the same",
+-                "3.4) To make sure, that when the Y coordinate of the mouse position higher of the upper bound of the list then scrolling of the list NO OCCURED and selected item NO CHANGES on the upper. If not, the test failed",
+-                "3.5) To make sure, that when the Y coordinate of the mouse position under of the lower bound of the list then scrolling of the list NO OCCURED and selected item NO CHANGES on the lower. If not, the test failed",
+-                "4) Test passed."
+-            };
+-            Sysout.createDialogWithInstructions( instructions );
++              System.out.println(INAPPLICABLE);
++              return;
+         }
  
-   ULONG id = 0;
--  COM_VERIFY_OK_(ptrIDebugSystemObjects->GetThreadIdBySystemId((ULONG)sysId, &id),
--                 "Windbg Error: GetThreadIdBySystemId failed!", 0);
+-    }//End  init()
 -
-+  HRESULT hr = ptrIDebugSystemObjects->GetThreadIdBySystemId((ULONG)sysId, &id);
-+  if (hr != S_OK) {
-+    // This is not considered fatal and does happen on occassion, usually with an
-+    // 0x80004002 "No such interface supported". The root cause is not fully understood,
-+    // but by ignoring this error and returning NULL, stacking walking code will get
-+    // null registers and fallback to using the "last java frame" if setup.
-+   printf("WARNING: GetThreadIdBySystemId failed with 0x%x for sysId (%" PRIu64 ")\n",
-+           hr, sysId);
-+    return -1;
-+  }
-   return (jlong) id;
- }
- 
-
-From 2cadd133d25e05be6ab9b16024a37bed79af1f15 Mon Sep 17 00:00:00 2001
-From: Xiaolin Zheng <xlinzheng@openjdk.org>
-Date: Wed, 30 Mar 2022 09:04:55 +0000
-Subject: [PATCH 132/140] 8283737: riscv: MacroAssembler::stop() should emit
- fixed-length instruction sequence
-
-Reviewed-by: fyang, shade
----
- src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 7 +++++--
- 1 file changed, 5 insertions(+), 2 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index fd18bb77058..b72a553da2f 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -542,8 +542,11 @@ void MacroAssembler::resolve_jobject(Register value, Register thread, Register t
- void MacroAssembler::stop(const char* msg) {
-   address ip = pc();
-   pusha();
--  li(c_rarg0, (uintptr_t)(address)msg);
--  li(c_rarg1, (uintptr_t)(address)ip);
-+  // The length of the instruction sequence emitted should be independent
-+  // of the values of msg and ip so that the size of mach nodes for scratch
-+  // emit and normal emit matches.
-+  mv(c_rarg0, (address)msg);
-+  mv(c_rarg1, (address)ip);
-   mv(c_rarg2, sp);
-   mv(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64));
-   jalr(c_rarg3);
-
-From 729e0db14cb320aedf1f12051e667513bddbb8e8 Mon Sep 17 00:00:00 2001
-From: Xiaolin Zheng <xlinzheng@openjdk.org>
-Date: Sun, 24 Apr 2022 02:17:03 +0000
-Subject: [PATCH 133/140] 8285437: riscv: Fix MachNode size mismatch for
- MacroAssembler::verify_oops*
-
-Reviewed-by: shade, fyang
----
- src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 10 ++++++++--
- 1 file changed, 8 insertions(+), 2 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index b72a553da2f..9f80f7e2650 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -389,7 +389,10 @@ void MacroAssembler::verify_oop(Register reg, const char* s) {
-   push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
- 
-   mv(c_rarg0, reg); // c_rarg0 : x10
--  li(t0, (uintptr_t)(address)b);
-+  // The length of the instruction sequence emitted should be independent
-+  // of the values of the local char buffer address so that the size of mach
-+  // nodes for scratch emit and normal emit matches.
-+  mv(t0, (address)b);
- 
-   // call indirectly to solve generation ordering problem
-   int32_t offset = 0;
-@@ -425,7 +428,10 @@ void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
-     ld(x10, addr);
-   }
- 
--  li(t0, (uintptr_t)(address)b);
-+  // The length of the instruction sequence emitted should be independent
-+  // of the values of the local char buffer address so that the size of mach
-+  // nodes for scratch emit and normal emit matches.
-+  mv(t0, (address)b);
- 
-   // call indirectly to solve generation ordering problem
-   int32_t offset = 0;
-
-From 5cab06c6f09f4b62d54d8d291b1a23f796a085c1 Mon Sep 17 00:00:00 2001
-From: Xiaolin Zheng <xlinzheng@openjdk.org>
-Date: Mon, 30 May 2022 07:45:50 +0000
-Subject: [PATCH 134/140] 8287418: riscv: Fix correctness issue of
- MacroAssembler::movptr
-
-Reviewed-by: fjiang, yadongwang, fyang
----
- src/hotspot/cpu/riscv/assembler_riscv.cpp      | 14 +++++++-------
- src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 18 +++++++++---------
- src/hotspot/cpu/riscv/macroAssembler_riscv.hpp |  3 ++-
- src/hotspot/cpu/riscv/nativeInst_riscv.cpp     |  2 +-
- 4 files changed, 19 insertions(+), 18 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/assembler_riscv.cpp b/src/hotspot/cpu/riscv/assembler_riscv.cpp
-index f15ef5304c5..a5f688cda1f 100644
---- a/src/hotspot/cpu/riscv/assembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/assembler_riscv.cpp
-@@ -282,9 +282,9 @@ void Assembler::movptr_with_offset(Register Rd, address addr, int32_t &offset) {
-   }
- #endif
-   assert(is_unsigned_imm_in_range(imm64, 47, 0) || (imm64 == (uintptr_t)-1),
--         "48-bit overflow in address constant");
--  // Load upper 32 bits
--  int32_t imm = imm64 >> 16;
-+         "bit 47 overflows in address constant");
-+  // Load upper 31 bits
-+  int32_t imm = imm64 >> 17;
-   int64_t upper = imm, lower = imm;
-   lower = (lower << 52) >> 52;
-   upper -= lower;
-@@ -292,13 +292,13 @@ void Assembler::movptr_with_offset(Register Rd, address addr, int32_t &offset) {
-   lui(Rd, upper);
-   addi(Rd, Rd, lower);
- 
--  // Load the rest 16 bits.
-+  // Load the rest 17 bits.
-   slli(Rd, Rd, 11);
--  addi(Rd, Rd, (imm64 >> 5) & 0x7ff);
--  slli(Rd, Rd, 5);
-+  addi(Rd, Rd, (imm64 >> 6) & 0x7ff);
-+  slli(Rd, Rd, 6);
- 
-   // This offset will be used by following jalr/ld.
--  offset = imm64 & 0x1f;
-+  offset = imm64 & 0x3f;
- }
- 
- void Assembler::movptr(Register Rd, uintptr_t imm64) {
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index 9f80f7e2650..f592d7585da 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -1158,12 +1158,12 @@ static int patch_offset_in_pc_relative(address branch, int64_t offset) {
- 
- static int patch_addr_in_movptr(address branch, address target) {
-   const int MOVPTR_INSTRUCTIONS_NUM = 6;                                        // lui + addi + slli + addi + slli + addi/jalr/load
--  int32_t lower = ((intptr_t)target << 36) >> 36;
--  int64_t upper = ((intptr_t)target - lower) >> 28;
--  Assembler::patch(branch + 0,  31, 12, upper & 0xfffff);                       // Lui.             target[47:28] + target[27] ==> branch[31:12]
--  Assembler::patch(branch + 4,  31, 20, (lower >> 16) & 0xfff);                 // Addi.            target[27:16] ==> branch[31:20]
--  Assembler::patch(branch + 12, 31, 20, (lower >> 5) & 0x7ff);                  // Addi.            target[15: 5] ==> branch[31:20]
--  Assembler::patch(branch + 20, 31, 20, lower & 0x1f);                          // Addi/Jalr/Load.  target[ 4: 0] ==> branch[31:20]
-+  int32_t lower = ((intptr_t)target << 35) >> 35;
-+  int64_t upper = ((intptr_t)target - lower) >> 29;
-+  Assembler::patch(branch + 0,  31, 12, upper & 0xfffff);                       // Lui.             target[48:29] + target[28] ==> branch[31:12]
-+  Assembler::patch(branch + 4,  31, 20, (lower >> 17) & 0xfff);                 // Addi.            target[28:17] ==> branch[31:20]
-+  Assembler::patch(branch + 12, 31, 20, (lower >> 6) & 0x7ff);                  // Addi.            target[16: 6] ==> branch[31:20]
-+  Assembler::patch(branch + 20, 31, 20, lower & 0x3f);                          // Addi/Jalr/Load.  target[ 5: 0] ==> branch[31:20]
-   return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
- }
- 
-@@ -1235,9 +1235,9 @@ static long get_offset_of_pc_relative(address insn_addr) {
- 
- static address get_target_of_movptr(address insn_addr) {
-   assert_cond(insn_addr != NULL);
--  intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 28;    // Lui.
--  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 16;                        // Addi.
--  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 5;                         // Addi.
-+  intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 29;    // Lui.
-+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 17;                        // Addi.
-+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 6;                         // Addi.
-   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20));                              // Addi/Jalr/Load.
-   return (address) target_address;
- }
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index 45ffc663963..792c1fc2103 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -827,7 +827,8 @@ class MacroAssembler: public Assembler {
- 
-   // Return true if an address is within the 48-bit RISCV64 address space.
-   bool is_valid_riscv64_address(address addr) {
--    return ((uintptr_t)addr >> 48) == 0;
-+    // sv48: must have bits 63–48 all equal to bit 47
-+    return ((uintptr_t)addr >> 47) == 0;
-   }
- 
-   void ld_constant(Register dest, const Address &const_addr) {
-diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
-index bfe84fa4e30..27011ad1283 100644
---- a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
-@@ -89,7 +89,7 @@ bool NativeInstruction::is_movptr_at(address instr) {
-          is_addi_at(instr + instruction_size) && // Addi
-          is_slli_shift_at(instr + instruction_size * 2, 11) && // Slli Rd, Rs, 11
-          is_addi_at(instr + instruction_size * 3) && // Addi
--         is_slli_shift_at(instr + instruction_size * 4, 5) && // Slli Rd, Rs, 5
-+         is_slli_shift_at(instr + instruction_size * 4, 6) && // Slli Rd, Rs, 6
-          (is_addi_at(instr + instruction_size * 5) ||
-           is_jalr_at(instr + instruction_size * 5) ||
-           is_load_at(instr + instruction_size * 5)) && // Addi/Jalr/Load
-
-From 41d73298bf28473b3ba2483e61a39c188eddfde3 Mon Sep 17 00:00:00 2001
-From: Kuai Wei <kuaiwei.kw@alibaba-inc.com>
-Date: Fri, 22 Sep 2023 16:57:56 +0800
-Subject: [PATCH 135/140] Fix: Fixed-length mv() mistakenly redirected to li()
- during reshaping
-
----
- src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 6 ++++++
- src/hotspot/cpu/riscv/macroAssembler_riscv.hpp | 3 +--
- 2 files changed, 7 insertions(+), 2 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-index f592d7585da..f851cc1e413 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -1343,6 +1343,12 @@ void MacroAssembler::mv(Register Rd, Address dest) {
-   movptr(Rd, dest.target());
- }
- 
-+void MacroAssembler::mv(Register Rd, address addr) {
-+  // Here in case of use with relocation, use fix length instruction
-+  // movptr instead of li
-+  movptr(Rd, addr);
-+}
-+
- void MacroAssembler::mv(Register Rd, RegisterOrConstant src) {
-   if (src.is_register()) {
-     mv(Rd, src.as_register());
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-index 792c1fc2103..65f91532661 100644
---- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -540,8 +540,6 @@ class MacroAssembler: public Assembler {
-   }
- 
-   // mv
--  void mv(Register Rd, address addr)                    { li(Rd, (int64_t)addr);  }
+-    public void start ()
+-    {
+-        setSize (400,100);
+-        setVisible(true);
+-        validate();
+-
+-    }// start()
+-
+-}// class ManualYesNoTest
+-
+-/****************************************************
+- Standard Test Machinery
+- DO NOT modify anything below -- it's a standard
+-  chunk of code whose purpose is to make user
+-  interaction uniform, and thereby make it simpler
+-  to read and understand someone else's test.
+- ****************************************************/
+-
+-/**
+- This is part of the standard test machinery.
+- It creates a dialog (with the instructions), and is the interface
+-  for sending text messages to the user.
+- To print the instructions, send an array of strings to Sysout.createDialog
+-  WithInstructions method.  Put one line of instructions per array entry.
+- To display a message for the tester to see, simply call Sysout.println
+-  with the string to be displayed.
+- This mimics System.out.println but works within the test harness as well
+-  as standalone.
+- */
 -
-   inline void mv(Register Rd, int imm64)                { li(Rd, (int64_t)imm64); }
-   inline void mv(Register Rd, long imm64)               { li(Rd, (int64_t)imm64); }
-   inline void mv(Register Rd, long long imm64)          { li(Rd, (int64_t)imm64); }
-@@ -552,6 +550,7 @@ class MacroAssembler: public Assembler {
-   inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); }
- 
-   void mv(Register Rd, Address dest);
-+  void mv(Register Rd, address dest);
-   void mv(Register Rd, RegisterOrConstant src);
- 
-   // logic
-
-From 26f4b26a98507ec03a2329bfcbaab393247fe83f Mon Sep 17 00:00:00 2001
-From: Xiaolin Zheng <xlinzheng@openjdk.org>
-Date: Fri, 2 Sep 2022 07:01:02 +0000
-Subject: [PATCH 136/140] 8293100: RISC-V: Need to save and restore
- callee-saved FloatRegisters in StubGenerator::generate_call_stub
-
-Reviewed-by: yadongwang, fjiang, shade, vkempik
----
- src/hotspot/cpu/riscv/frame_riscv.hpp         |  2 +-
- src/hotspot/cpu/riscv/riscv.ad                | 18 ++---
- src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 74 +++++++++++++++++--
- src/hotspot/cpu/riscv/vmreg_riscv.cpp         |  2 +-
- 4 files changed, 80 insertions(+), 16 deletions(-)
-
-diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp
-index 3b88f6d5a1a..18e021dcb94 100644
---- a/src/hotspot/cpu/riscv/frame_riscv.hpp
-+++ b/src/hotspot/cpu/riscv/frame_riscv.hpp
-@@ -131,7 +131,7 @@
-     // Entry frames
-     // n.b. these values are determined by the layout defined in
-     // stubGenerator for the Java call stub
--    entry_frame_after_call_words                     =  22,
-+    entry_frame_after_call_words                     =  34,
-     entry_frame_call_wrapper_offset                  = -10,
- 
-     // we don't need a save area
-diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
-index e410bd06aa6..69696b272a5 100644
---- a/src/hotspot/cpu/riscv/riscv.ad
-+++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -8601,7 +8601,7 @@ instruct cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
-   effect(USE lbl);
- 
-   ins_cost(XFER_COST + BRANCH_COST);
--  format %{ "float_b$cmp $op1, $op2 \t#@cmpF_branch"%}
-+  format %{ "float_b$cmp $op1, $op2, $lbl \t#@cmpF_branch"%}
- 
-   ins_encode %{
-     __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label));
-@@ -8618,7 +8618,7 @@ instruct cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
-   effect(USE lbl);
- 
-   ins_cost(XFER_COST + BRANCH_COST);
--  format %{ "float_b$cmp $op1, $op2\t#@cmpF_loop"%}
-+  format %{ "float_b$cmp $op1, $op2, $lbl\t#@cmpF_loop"%}
- 
-   ins_encode %{
-     __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label));
-@@ -8636,7 +8636,7 @@ instruct cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
-   effect(USE lbl);
- 
-   ins_cost(XFER_COST + BRANCH_COST);
--  format %{ "double_b$cmp $op1, $op2\t#@cmpD_branch"%}
-+  format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_branch"%}
- 
-   ins_encode %{
-     __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-@@ -8654,7 +8654,7 @@ instruct cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
-   effect(USE lbl);
- 
-   ins_cost(XFER_COST + BRANCH_COST);
--  format %{ "double_b$cmp $op1, $op2\t#@cmpD_loop"%}
-+  format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_loop"%}
- 
-   ins_encode %{
-     __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-@@ -8929,7 +8929,7 @@ instruct far_cmpFlag_branch(cmpOp cmp, rFlagsReg cr, label lbl) %{
-   effect(USE lbl);
- 
-   ins_cost(BRANCH_COST);
--  format %{ "far_b$cmp $cr, zr, L\t#@far_cmpFlag_branch"%}
-+  format %{ "far_b$cmp $cr, zr, $lbl\t#@far_cmpFlag_branch"%}
- 
-   ins_encode %{
-     __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label), /* is_far */ true);
-@@ -9138,7 +9138,7 @@ instruct far_cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
-   effect(USE lbl);
- 
-   ins_cost(XFER_COST + BRANCH_COST * 2);
--  format %{ "far_float_b$cmp $op1, $op2\t#@far_cmpF_branch"%}
-+  format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_branch"%}
- 
-   ins_encode %{
-     __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
-@@ -9154,7 +9154,7 @@ instruct far_cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
-   effect(USE lbl);
- 
-   ins_cost(XFER_COST + BRANCH_COST * 2);
--  format %{ "far_float_b$cmp $op1, $op2\t#@far_cmpF_loop"%}
-+  format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_loop"%}
- 
-   ins_encode %{
-     __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
-@@ -9171,7 +9171,7 @@ instruct far_cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
-   effect(USE lbl);
- 
-   ins_cost(XFER_COST + BRANCH_COST * 2);
--  format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_branch"%}
-+  format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_branch"%}
- 
-   ins_encode %{
-     __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-@@ -9187,7 +9187,7 @@ instruct far_cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
-   effect(USE lbl);
- 
-   ins_cost(XFER_COST + BRANCH_COST * 2);
--  format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_loop"%}
-+  format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_loop"%}
- 
-   ins_encode %{
-     __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-index 74c38c3d044..9970229c5c5 100644
---- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
-@@ -118,16 +118,28 @@ class StubGenerator: public StubCodeGenerator {
-   // we don't need to save x6-x7 and x28-x31 which both C and Java treat as
-   // volatile
-   //
--  // we save x18-x27 which Java uses as temporary registers and C
--  // expects to be callee-save
-+  // we save x9, x18-x27, f8-f9, and f18-f27 which Java uses as temporary
-+  // registers and C expects to be callee-save
-   //
-   // so the stub frame looks like this when we enter Java code
-   //
-   //     [ return_from_Java     ] <--- sp
-   //     [ argument word n      ]
-   //      ...
--  // -22 [ argument word 1      ]
--  // -21 [ saved x27            ] <--- sp_after_call
-+  // -34 [ argument word 1      ]
-+  // -33 [ saved f27            ] <--- sp_after_call
-+  // -32 [ saved f26            ]
-+  // -31 [ saved f25            ]
-+  // -30 [ saved f24            ]
-+  // -29 [ saved f23            ]
-+  // -28 [ saved f22            ]
-+  // -27 [ saved f21            ]
-+  // -26 [ saved f20            ]
-+  // -25 [ saved f19            ]
-+  // -24 [ saved f18            ]
-+  // -23 [ saved f9             ]
-+  // -22 [ saved f8             ]
-+  // -21 [ saved x27            ]
-   // -20 [ saved x26            ]
-   // -19 [ saved x25            ]
-   // -18 [ saved x24            ]
-@@ -152,7 +164,20 @@ class StubGenerator: public StubCodeGenerator {
+-class Sysout
+-{
+-    private static TestDialog dialog;
+-
+-    public static void createDialogWithInstructions( String[] instructions )
+-    {
+-        dialog = new TestDialog( new Frame(), "Instructions" );
+-        dialog.printInstructions( instructions );
+-        dialog.setVisible(true);
+-        println( "Any messages for the tester will display here." );
+-    }
+-
+-    public static void createDialog( )
+-    {
+-        dialog = new TestDialog( new Frame(), "Instructions" );
+-        String[] defInstr = { "Instructions will appear here. ", "" } ;
+-        dialog.printInstructions( defInstr );
+-        dialog.setVisible(true);
+-        println( "Any messages for the tester will display here." );
+-    }
+-
+-
+-    public static void printInstructions( String[] instructions )
+-    {
+-        dialog.printInstructions( instructions );
+-    }
+-
+-
+-    public static void println( String messageIn )
+-    {
+-        dialog.displayMessage( messageIn );
+-    }
+-
+-}// Sysout  class
+-
+-/**
+-  This is part of the standard test machinery.  It provides a place for the
+-   test instructions to be displayed, and a place for interactive messages
+-   to the user to be displayed.
+-  To have the test instructions displayed, see Sysout.
+-  To have a message to the user be displayed, see Sysout.
+-  Do not call anything in this dialog directly.
+-  */
+-class TestDialog extends Dialog
+-{
+-
+-    TextArea instructionsText;
+-    TextArea messageText;
+-    int maxStringLength = 80;
+-
+-    //DO NOT call this directly, go through Sysout
+-    public TestDialog( Frame frame, String name )
+-    {
+-        super( frame, name );
+-        int scrollBoth = TextArea.SCROLLBARS_BOTH;
+-        instructionsText = new TextArea( "", 15, maxStringLength, scrollBoth );
+-        add( "North", instructionsText );
+-
+-        messageText = new TextArea( "", 5, maxStringLength, scrollBoth );
+-        add("Center", messageText);
+-
+-        pack();
+-
+-        setVisible(true);
+-    }// TestDialog()
+-
+-    //DO NOT call this directly, go through Sysout
+-    public void printInstructions( String[] instructions )
+-    {
+-        //Clear out any current instructions
+-        instructionsText.setText( "" );
+-
+-        //Go down array of instruction strings
+-
+-        String printStr, remainingStr;
+-        for( int i=0; i < instructions.length; i++ )
+-        {
+-            //chop up each into pieces maxSringLength long
+-            remainingStr = instructions[ i ];
+-            while( remainingStr.length() > 0 )
+-            {
+-                //if longer than max then chop off first max chars to print
+-                if( remainingStr.length() >= maxStringLength )
+-                {
+-                    //Try to chop on a word boundary
+-                    int posOfSpace = remainingStr.
+-                        lastIndexOf( ' ', maxStringLength - 1 );
+-
+-                    if( posOfSpace <= 0 ) posOfSpace = maxStringLength - 1;
+-
+-                    printStr = remainingStr.substring( 0, posOfSpace + 1 );
+-                    remainingStr = remainingStr.substring( posOfSpace + 1 );
+-                }
+-                //else just print
+-                else
+-                {
+-                    printStr = remainingStr;
+-                    remainingStr = "";
+-                }
+-
+-                instructionsText.append( printStr + "\n" );
+-
+-            }// while
+-
+-        }// for
+-
+-    }//printInstructions()
+-
+-    //DO NOT call this directly, go through Sysout
+-    public void displayMessage( String messageIn )
+-    {
+-        messageText.append( messageIn + "\n" );
+-        System.out.println(messageIn);
++        PassFailJFrame
++            .builder()
++            .instructions(INSTRUCTIONS)
++            .rows(40)
++            .columns(70)
++            .testUI(MouseDraggedOutCauseScrollingTest::createUI)
++            .build()
++            .awaitAndCheck();
+     }
  
-   // Call stub stack layout word offsets from fp
-   enum call_stub_layout {
--    sp_after_call_off  = -21,
-+    sp_after_call_off  = -33,
-+
-+    f27_off            = -33,
-+    f26_off            = -32,
-+    f25_off            = -31,
-+    f24_off            = -30,
-+    f23_off            = -29,
-+    f22_off            = -28,
-+    f21_off            = -27,
-+    f20_off            = -26,
-+    f19_off            = -25,
-+    f18_off            = -24,
-+    f9_off             = -23,
-+    f8_off             = -22,
+-}// TestDialog  class
++    static final String INAPPLICABLE = "The test is not applicable to the current platform. Test PASSES.";
++    static final String INSTRUCTIONS = "0) Please note, that this is an XAWT/Linux only test. First, make the test window is active.\n" +
++            "-----------------------------------\n" +
++            "1.1) Click on the Choice.\n" +
++            "1.2) Press and hold down the left button of the mouse to select (eg) item 5 in the choice.\n" +
++            "1.3) Drag the mouse vertically out of the area of the open list,\n" +
++            "     keeping the X coordinate of the mouse position about the same.\n" +
++            "1.4) Check that when the Y coordinate of the mouse position is higher than the upper bound of the list\n" +
++            "     then the list continues to scrolls UP and the selected item changes at the top until you reach the topmost item.\n" +
++            "     If not, the test failed. Press FAIL.\n" +
++            "1.5) Check that when the Y coordinate of the mouse position is lower than the lower bound of the list\n" +
++            "     then the list continues to scroll DOWN and the selected item changes at the bottom until you reach the bottommost item.\n" +
++            "     If not, the test failed. Press FAIL.\n" +
++            "-----------------------------------\n" +
++            "2.1) Click on the Single List.\n" +
++            "2.2) Press and hold down the left button of the mouse to select (eg) item 5 in the list.\n" +
++            "2.3) Drag the mouse vertically out of the area of the open list,\n" +
++            "     keeping the X coordinate of the mouse position about the same.\n" +
++            "2.4) Check that when the Y coordinate of the mouse position is higher than the upper bound of the list\n" +
++            "     then the list continues to scrolls UP and the selected item changes at the top until you reach the topmost item.\n" +
++            "     If not, the test failed. Press FAIL.\n" +
++            "2.5) Check that when the Y coordinate of the mouse position is lower than the lower bound of the list\n" +
++            "     then the list continues to scroll DOWN and the selected item changes at the bottom until you reach the bottommost item.\n" +
++            "     If not, the test failed. Press FAIL.\n" +
++            "-----------------------------------\n" +
++            "3.1) Click on the Multiple List.\n" +
++            "3.2) Press and hold down the left button of the mouse to select (eg) item 5 in the list.\n" +
++            "3.3) Drag the mouse vertically out of the area of the open list,\n" +
++            "     keeping the X coordinate of the mouse position about the same.\n" +
++            "3.4) Check that when the Y coordinate of the mouse is higher than the upper bound of the list\n" +
++            "     that scrolling of the list DOES NOT OCCUR and the selected item IS UNCHANGED at the top.\n" +
++            "     If not, the test failed. Press FAIL.\n" +
++            "3.5) Check that when the Y coordinate of the mouse is below the lower bound of the list\n" +
++            "     that scrolling of the list DOES NOT OCCUR and the selected item IS UNCHANGED at the bottom.\n" +
++            "     If not, the test failed. Press FAIL.\n" +
++            "-----------------------------------\n" +
++            "4) The test has now passed. Press PASS.";
++}
+diff --git a/test/jdk/java/awt/Modal/PrintDialogsTest/PrintDialogsTest.html b/test/jdk/java/awt/Modal/PrintDialogsTest/PrintDialogsTest.html
+index a562b886ab..e69de29bb2 100644
+--- a/test/jdk/java/awt/Modal/PrintDialogsTest/PrintDialogsTest.html
++++ b/test/jdk/java/awt/Modal/PrintDialogsTest/PrintDialogsTest.html
+@@ -1,44 +0,0 @@
+-<!--
+-Copyright (c) 2007, 2014, Oracle and/or its affiliates. All rights reserved.
+-DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+-
+-This code is free software; you can redistribute it and/or modify it
+-under the terms of the GNU General Public License version 2 only, as
+-published by the Free Software Foundation.
+-
+-This code is distributed in the hope that it will be useful, but WITHOUT
+-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+-FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+-version 2 for more details (a copy is included in the LICENSE file that
+-accompanied this code).
+-
+-You should have received a copy of the GNU General Public License version
+-2 along with this work; if not, write to the Free Software Foundation,
+-Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+-
+-Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+-or visit www.oracle.com if you need additional information or have any
+-questions.
+--->
+-
+-<html>
+-<head>
+-<title> PrintDialogsTest </title>
+-</head>
+-<body>
+-<applet code="PrintDialogsTest.class" width=250 height=350></applet>
+-
+-Please select dialog modality type and parent; also select
+-the print auxiliary dialog to be displayed (Page Setup or Print dialog).
+-Then click "Start test" button.
+-
+-When the windows will appear check if modal blocking for Dialog works as expected.
+-Then push "Open" button on the Dialog to show the auxiliary dialog and check
+-if it blocks the rest of the application. Then close it and check correctness
+-of modal blocking behavior for the Dialog again. To close all the test
+-windows please push "Finish" button.
+-
+-To finish the overall test push "Pass" or "Fail" button depending on result.
+-
+-</body>
+-</html>
+diff --git a/test/jdk/java/awt/Modal/PrintDialogsTest/PrintDialogsTest.java b/test/jdk/java/awt/Modal/PrintDialogsTest/PrintDialogsTest.java
+index 989c48295b..8a07d284a9 100644
+--- a/test/jdk/java/awt/Modal/PrintDialogsTest/PrintDialogsTest.java
++++ b/test/jdk/java/awt/Modal/PrintDialogsTest/PrintDialogsTest.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2007, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2007, 2024, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,21 +25,75 @@
+ /*
+  * @test
+  * @bug 8055836 8057694 8055752
+- * @summary Check if Print and Page Setup dialogs lock other windows;
++ * @summary Check if Print and Page Setup dialogs block other windows;
+  *          check also correctness of modal behavior for other dialogs.
+- *
+- * @run applet/manual=yesno PrintDialogsTest.html
++ * @library /java/awt/regtesthelpers
++ * @run main/manual PrintDialogsTest
+  */
  
-     x27_off            = -21,
-     x26_off            = -20,
-@@ -198,6 +223,19 @@ class StubGenerator: public StubCodeGenerator {
  
-     const Address thread        (fp, thread_off         * wordSize);
+-import java.applet.Applet;
+-import java.awt.*;
++import java.awt.BorderLayout;
++import java.awt.Button;
++import java.awt.Checkbox;
++import java.awt.CheckboxGroup;
++import java.awt.Dialog;
++import java.awt.Frame;
++import java.awt.EventQueue;
++import java.awt.GridLayout;
++import java.awt.Label;
++import java.awt.Panel;
  
-+    const Address f27_save      (fp, f27_off            * wordSize);
-+    const Address f26_save      (fp, f26_off            * wordSize);
-+    const Address f25_save      (fp, f25_off            * wordSize);
-+    const Address f24_save      (fp, f24_off            * wordSize);
-+    const Address f23_save      (fp, f23_off            * wordSize);
-+    const Address f22_save      (fp, f22_off            * wordSize);
-+    const Address f21_save      (fp, f21_off            * wordSize);
-+    const Address f20_save      (fp, f20_off            * wordSize);
-+    const Address f19_save      (fp, f19_off            * wordSize);
-+    const Address f18_save      (fp, f18_off            * wordSize);
-+    const Address f9_save       (fp, f9_off             * wordSize);
-+    const Address f8_save       (fp, f8_off             * wordSize);
-+
-     const Address x27_save      (fp, x27_off            * wordSize);
-     const Address x26_save      (fp, x26_off            * wordSize);
-     const Address x25_save      (fp, x25_off            * wordSize);
-@@ -244,6 +282,19 @@ class StubGenerator: public StubCodeGenerator {
-     __ sd(x26, x26_save);
-     __ sd(x27, x27_save);
+ import java.awt.event.ActionEvent;
+ import java.awt.event.ActionListener;
  
-+    __ fsd(f8,  f8_save);
-+    __ fsd(f9,  f9_save);
-+    __ fsd(f18, f18_save);
-+    __ fsd(f19, f19_save);
-+    __ fsd(f20, f20_save);
-+    __ fsd(f21, f21_save);
-+    __ fsd(f22, f22_save);
-+    __ fsd(f23, f23_save);
-+    __ fsd(f24, f24_save);
-+    __ fsd(f25, f25_save);
-+    __ fsd(f26, f26_save);
-+    __ fsd(f27, f27_save);
-+
-     // install Java thread in global register now we have saved
-     // whatever value it held
-     __ mv(xthread, c_rarg7);
-@@ -335,6 +386,19 @@ class StubGenerator: public StubCodeGenerator {
- #endif
  
-     // restore callee-save registers
-+    __ fld(f27, f27_save);
-+    __ fld(f26, f26_save);
-+    __ fld(f25, f25_save);
-+    __ fld(f24, f24_save);
-+    __ fld(f23, f23_save);
-+    __ fld(f22, f22_save);
-+    __ fld(f21, f21_save);
-+    __ fld(f20, f20_save);
-+    __ fld(f19, f19_save);
-+    __ fld(f18, f18_save);
-+    __ fld(f9,  f9_save);
-+    __ fld(f8,  f8_save);
-+
-     __ ld(x27, x27_save);
-     __ ld(x26, x26_save);
-     __ ld(x25, x25_save);
-diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
-index 5d1187c2a27..c4338715f95 100644
---- a/src/hotspot/cpu/riscv/vmreg_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
-@@ -40,7 +40,7 @@ void VMRegImpl::set_regName() {
-   FloatRegister freg = ::as_FloatRegister(0);
-   for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
-     for (int j = 0 ; j < FloatRegisterImpl::max_slots_per_register ; j++) {
--      regName[i++] = reg->name();
-+      regName[i++] = freg->name();
-     }
-     freg = freg->successor();
-   }
-
-From 69ea557c320ad7b2f35fc0e986af9b485f95addf Mon Sep 17 00:00:00 2001
-From: Xiaolin Zheng <xlinzheng@openjdk.org>
-Date: Fri, 28 Oct 2022 11:56:21 +0000
-Subject: [PATCH 137/140] 8295926: RISC-V: C1: Fix
- LIRGenerator::do_LibmIntrinsic
-
-Reviewed-by: yadongwang, fyang
----
- .../cpu/riscv/c1_LIRGenerator_riscv.cpp       | 21 +++--
- .../floatingpoint/TestLibmIntrinsics.java     | 80 +++++++++++++++++++
- 2 files changed, 96 insertions(+), 5 deletions(-)
- create mode 100644 test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java
-
-diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-index f9242251491..c41819fc2ae 100644
---- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-+++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-@@ -679,19 +679,30 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
- void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
-   LIRItem value(x->argument_at(0), this);
-   value.set_destroys_register();
-+
-   LIR_Opr calc_result = rlock_result(x);
-   LIR_Opr result_reg = result_register_for(x->type());
+-public class PrintDialogsTest extends Applet implements ActionListener {
++public class PrintDialogsTest extends Panel implements ActionListener {
 +
-   CallingConvention* cc = NULL;
--  BasicTypeList signature(1);
--  signature.append(T_DOUBLE);
--  if (x->id() == vmIntrinsics::_dpow) { signature.append(T_DOUBLE); }
--  cc = frame_map()->c_calling_convention(&signature);
--  value.load_item_force(cc->at(0));
-+
-   if (x->id() == vmIntrinsics::_dpow) {
-     LIRItem value1(x->argument_at(1), this);
-+
-     value1.set_destroys_register();
-+
-+    BasicTypeList signature(2);
-+    signature.append(T_DOUBLE);
-+    signature.append(T_DOUBLE);
-+    cc = frame_map()->c_calling_convention(&signature);
-+    value.load_item_force(cc->at(0));
-     value1.load_item_force(cc->at(1));
-+  } else {
-+    BasicTypeList signature(1);
-+    signature.append(T_DOUBLE);
-+    cc = frame_map()->c_calling_convention(&signature);
-+    value.load_item_force(cc->at(0));
-   }
++    static final String INSTRUCTIONS =
++        "This test is free format, which means there is no enforced or guided sequence." + "\n" +
 +
-   switch (x->id()) {
-     case vmIntrinsics::_dexp:
-       if (StubRoutines::dexp() != NULL) { __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args()); }
-diff --git a/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java
-new file mode 100644
-index 00000000000..5c711efddea
---- /dev/null
-+++ b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java
-@@ -0,0 +1,80 @@
-+/*
-+ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2022, Alibaba Group Holding Limited. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ */
++        "Please select each of " + "\n" +
++        "(a) The dialog parent type." + "\n" +
++        "(b) The dialog modality type" + "\n" +
++        "(c) The print dialog type (Print dialog or Page Setup dialog)" + "\n" +
 +
-+/*
-+ * @test
-+ * @summary Test libm intrinsics
-+ * @library /test/lib /
-+ *
-+ * @build jdk.test.whitebox.WhiteBox
-+ * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
-+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
-+ *                   -XX:-BackgroundCompilation -XX:-UseOnStackReplacement
-+ *                   compiler.floatingpoint.TestLibmIntrinsics
-+ */
++        "Once the choices have been made click the \"Start test\" button." + "\n" +
 +
-+package compiler.floatingpoint;
++        "Three windows will appear" + "\n" +
++        "(1) A Frame or a Dialog - in the case you selected \"Dialog\" as the parent type" + "\n" +
++        "(2) a Window (ie an undecorated top-level)" + "\n" +
++        "(3) A dialog with two buttons \"Open\" and \"Finish\"" + "\n" +
 +
-+import compiler.whitebox.CompilerWhiteBoxTest;
-+import jdk.test.whitebox.WhiteBox;
++        "Now check as follows whether modal blocking works as expected." + "\n" +
++        "Windows (1) and (2) contain a button which you should be able to press" + "\n" +
++        "ONLY if you selected \"Non-modal\", or \"Modeless\" for modality type." + "\n" +
++        "In other cases window (3) will block input to (1) and (2)" + "\n" +
 +
-+import java.lang.reflect.Method;
++        "Then push the \"Open\" button on the Dialog to show the printing dialog and check" + "\n" +
++        "if it blocks the rest of the application - ie all of windows (1), (2) and (3)" + "\n" +
++        "should ALWAYS be blocked when the print dialog is showing." + "\n" +
++        "Now cancel the printing dialog and check the correctness of modal blocking" + "\n" +
++        "behavior for the Dialog again." + "\n" +
++        "To close all the 3 test windows please push the \"Finish\" button." + "\n" +
 +
-+public class TestLibmIntrinsics {
++        "Repeat all the above for different combinations, which should include" + "\n" +
++        "using all of the Dialog parent choices and all of the Dialog Modality types." + "\n" +
 +
-+    private static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox();
++        "If any behave incorrectly, note the combination of choices and press Fail." + "\n" +
 +
-+    private static final double pi = 3.1415926;
++        "If all behave correctly, press Pass.";
 +
-+    private static final double expected = 2.5355263553695413;
++    public static void main(String[] args) throws Exception {
 +
-+    static double m() {
-+        return Math.pow(pi, Math.sin(Math.cos(Math.tan(Math.log(Math.log10(Math.exp(pi)))))));
++         PassFailJFrame.builder()
++             .instructions(INSTRUCTIONS)
++             .rows(35)
++             .columns(60)
++             .testUI(PrintDialogsTest::createUI)
++             .testTimeOut(10)
++             .build()
++             .awaitAndCheck();
 +    }
-+
-+    static public void main(String[] args) throws NoSuchMethodException {
-+        Method test_method = compiler.floatingpoint.TestLibmIntrinsics.class.getDeclaredMethod("m");
-+
-+        double interpreter_result = m();
-+
-+        // Compile with C1 if possible
-+        WHITE_BOX.enqueueMethodForCompilation(test_method, CompilerWhiteBoxTest.COMP_LEVEL_SIMPLE);
-+
-+        double c1_result = m();
-+
-+        WHITE_BOX.deoptimizeMethod(test_method);
-+
-+        // Compile it with C2 if possible
-+        WHITE_BOX.enqueueMethodForCompilation(test_method, CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION);
-+
-+        double c2_result = m();
-+
-+        if (interpreter_result != c1_result ||
-+            interpreter_result != c2_result ||
-+            c1_result != c2_result) {
-+            System.out.println("interpreter = " + interpreter_result + " c1 = " + c1_result + " c2 = " + c2_result);
-+            throw new RuntimeException("Test Failed");
-+        }
+ 
+     private Button btnTest;
+     private Checkbox  cbPage, cbPrint,
+@@ -48,6 +102,14 @@ public class PrintDialogsTest extends Applet implements ActionListener {
+ 
+     private CheckboxGroup groupDialog, groupParent, groupModType;
+ 
++    private static Frame createUI() {
++        Frame frame = new Frame("Dialog Modality Testing");
++        PrintDialogsTest test = new PrintDialogsTest();
++        test.createGUI();
++        frame.add(test);
++        frame.pack();
++        return frame;
 +    }
-+}
-
-From ec57f23aa4001315a030cacd55aa5ef7c3269fbb Mon Sep 17 00:00:00 2001
-From: Kuai Wei <kuaiwei.kw@alibaba-inc.com>
-Date: Mon, 9 Oct 2023 11:07:34 +0800
-Subject: [PATCH 138/140] Fix test error after port 8295926
-
----
- .../jtreg/compiler/floatingpoint/TestLibmIntrinsics.java    | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java
-index 5c711efddea..5a1b659bbe0 100644
---- a/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java
-+++ b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java
-@@ -27,8 +27,8 @@
-  * @summary Test libm intrinsics
-  * @library /test/lib /
-  *
-- * @build jdk.test.whitebox.WhiteBox
-- * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
-+ * @build sun.hotspot.WhiteBox
-+ * @run driver ClassFileInstaller sun.hotspot.WhiteBox
-  * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
-  *                   -XX:-BackgroundCompilation -XX:-UseOnStackReplacement
-  *                   compiler.floatingpoint.TestLibmIntrinsics
-@@ -37,7 +37,7 @@
- package compiler.floatingpoint;
  
- import compiler.whitebox.CompilerWhiteBoxTest;
--import jdk.test.whitebox.WhiteBox;
-+import sun.hotspot.WhiteBox;
+     public void actionPerformed(ActionEvent e) {
  
- import java.lang.reflect.Method;
+@@ -99,13 +161,13 @@ public class PrintDialogsTest extends Applet implements ActionListener {
  
-
-From b115ec4381ad3ad8cbe9ca3d225cb438538916ac Mon Sep 17 00:00:00 2001
-From: Kuai Wei <kuaiwei.kw@alibaba-inc.com>
-Date: Tue, 17 Oct 2023 14:22:49 +0800
-Subject: [PATCH 139/140] Revert JDK-8247533: SA stack walking sometimes fails
- with sun.jvm.hotspot.debugger.DebuggerException: get_thread_regs failed for a
- lwp
-
----
- .../native/libsaproc/LinuxDebuggerLocal.c     |  8 +------
- .../linux/native/libsaproc/ps_proc.c          |  3 +--
- .../native/libsaproc/MacosxDebuggerLocal.m    | 24 +++++++------------
- .../debugger/bsd/BsdDebuggerLocal.java        |  2 +-
- .../jvm/hotspot/debugger/bsd/BsdThread.java   | 10 +++-----
- .../debugger/linux/LinuxDebuggerLocal.java    |  2 +-
- .../hotspot/debugger/linux/LinuxThread.java   | 10 +++-----
- .../windbg/amd64/WindbgAMD64Thread.java       | 15 ++++--------
- .../windows/native/libsaproc/sawindbg.cpp     | 14 +++--------
- 9 files changed, 27 insertions(+), 61 deletions(-)
-
-diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
-index 6f1887f8113..45a927fb5ee 100644
---- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
-+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
-@@ -413,13 +413,7 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
+         setLayout(new BorderLayout());
  
-   struct ps_prochandle* ph = get_proc_handle(env, this_obj);
-   if (get_lwp_regs(ph, lwp_id, &gregs) != true) {
--    // This is not considered fatal and does happen on occassion, usually with an
--    // ESRCH error. The root cause is not fully understood, but by ignoring this error
--    // and returning NULL, stacking walking code will get null registers and fallback
--    // to using the "last java frame" if setup.
--    fprintf(stdout, "WARNING: getThreadIntegerRegisterSet0: get_lwp_regs failed for lwp (%d)\n", lwp_id);
--    fflush(stdout);
--    return NULL;
-+     THROW_NEW_DEBUGGER_EXCEPTION_("get_thread_regs failed for a lwp", 0);
-   }
+-        setSize(350, 200);
+         Panel panel = new Panel();
+-        panel.setLayout(new GridLayout(18, 1));
++        panel.setLayout(new GridLayout(21, 1));
  
- #undef NPRGREG
-diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
-index 691c3f6684a..de5254d859e 100644
---- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
-+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
-@@ -144,8 +144,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use
+         btnTest = new Button("Start test");
+         btnTest.addActionListener(this);
+         panel.add(btnTest);
++        panel.add(new Label(" ")); // spacing
  
- #ifdef PTRACE_GETREGS_REQ
-  if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) {
--   print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp(%d) errno(%d) \"%s\"\n", pid,
--               errno, strerror(errno));
-+   print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp %d\n", pid);
-    return false;
-  }
-  return true;
-diff --git a/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m b/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m
-index e46370a1f18..18b8b4282fe 100644
---- a/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m
-+++ b/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m
-@@ -685,7 +685,7 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo
-   JNIEnv *env, jobject this_obj,
-   jlong thread_id)
- {
--  print_debug("getThreadIntegerRegisterSet0 called\n");
-+  print_debug("getThreadRegisterSet0 called\n");
  
-   struct ps_prochandle* ph = get_proc_handle(env, this_obj);
-   if (ph != NULL && ph->core != NULL) {
-@@ -705,13 +705,7 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo
-   result = thread_get_state(tid, HSDB_THREAD_STATE, (thread_state_t)&state, &count);
+         panel.add(new Label("Dialog parent:"));
+@@ -123,6 +185,7 @@ public class PrintDialogsTest extends Applet implements ActionListener {
+         panel.add(cbHiddFrm);
+         panel.add(cbDlg);
+         panel.add(cbFrm);
++        panel.add(new Label(" ")); // spacing
  
-   if (result != KERN_SUCCESS) {
--    // This is not considered fatal. Unlike on Linux and Windows, we haven't seen a
--    // failure to get thread registers, but if it were to fail the response should
--    // be the same. By ignoring this error and returning NULL, stacking walking code
--    // will get null registers and fallback to using the "last java frame" if setup.
--    fprintf(stdout, "WARNING: getThreadIntegerRegisterSet0: thread_get_state failed (%d) for thread (%d)\n",
--            result, tid);
--    fflush(stdout);
-+    print_error("getregs: thread_get_state(%d) failed (%d)\n", tid, result);
-     return NULL;
-   }
+         panel.add(new Label("Dialog modality type:"));
+         groupModType = new CheckboxGroup();
+@@ -139,7 +202,7 @@ public class PrintDialogsTest extends Applet implements ActionListener {
+         panel.add(cbDocModal);
+         panel.add(cbTKModal);
+         panel.add(cbModeless);
+-        add(panel);
++        panel.add(new Label(" ")); // spacing
  
-@@ -814,25 +808,25 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo
-  */
- JNIEXPORT jint JNICALL
- Java_sun_jvm_hotspot_debugger_macosx_MacOSXDebuggerLocal_translateTID0(
--  JNIEnv *env, jobject this_obj, jint tid)
-+  JNIEnv *env, jobject this_obj, jint tid) 
- {
-   print_debug("translateTID0 called on tid = 0x%x\n", (int)tid);
+         panel.add(new Label("Print dialog type:"));
+         groupDialog = new CheckboxGroup();
+@@ -148,13 +211,6 @@ public class PrintDialogsTest extends Applet implements ActionListener {
+         panel.add(cbPage);
+         panel.add(cbPrint);
  
-   kern_return_t result;
-   thread_t foreign_tid, usable_tid;
-   mach_msg_type_name_t type;
--
-+  
-   foreign_tid = tid;
--
-+    
-   task_t gTask = getTask(env, this_obj);
--  result = mach_port_extract_right(gTask, foreign_tid,
--				   MACH_MSG_TYPE_COPY_SEND,
-+  result = mach_port_extract_right(gTask, foreign_tid, 
-+				   MACH_MSG_TYPE_COPY_SEND, 
- 				   &usable_tid, &type);
-   if (result != KERN_SUCCESS)
-     return -1;
--
-+    
-   print_debug("translateTID0: 0x%x -> 0x%x\n", foreign_tid, usable_tid);
+-        validate();
+-        setVisible(true);
+-    }
 -
-+    
-   return (jint) usable_tid;
+-    public void start() {
+-        try {
+-            EventQueue.invokeAndWait(this::createGUI);
+-        } catch (Exception e) {}
++        add(panel);
+     }
  }
+diff --git a/test/jdk/javax/naming/module/RunBasic.java b/test/jdk/javax/naming/module/RunBasic.java
+index 512062de40..f9d259d620 100644
+--- a/test/jdk/javax/naming/module/RunBasic.java
++++ b/test/jdk/javax/naming/module/RunBasic.java
+@@ -134,7 +134,15 @@ public class RunBasic {
+         opts.add("test/" + clsName);
+         opts.add("ldap://" + HOST_NAME + "/dc=ie,dc=oracle,dc=com");
+         System.out.println("Running with the '" + desc + "' module...");
++<<<<<<< HEAD
++        runJava("-Dtest.src=" + TEST_SRC, "-p", "mods", "-m", "test/" + clsName,
++                "ldap://" + HOST_NAME + "/dc=ie,dc=oracle,dc=com");
++||||||| 82c330b464
++        runJava("-Dtest.src=" + TEST_SRC, "-p", "mods", "-m", "test/" + clsName,
++                "ldap://localhost/dc=ie,dc=oracle,dc=com");
++=======
+         runJava(opts.toArray(String[]::new));
++>>>>>>> cee8535a9d3de8558b4b5028d68e397e508bef71
+     }
  
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java
-index d0557a7d254..655b450c3fc 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java
-@@ -166,7 +166,7 @@ public WorkerThreadTask execute(WorkerThreadTask task) throws DebuggerException
-                 } catch (InterruptedException x) {}
-              }
-              if (lastException != null) {
--                throw new DebuggerException(lastException.getMessage(), lastException);
-+                throw new DebuggerException(lastException);
-              } else {
-                 return task;
-              }
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java
-index c52d3a51d54..0d637f30f14 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java
+     private static void runJava(String... opts) throws Throwable {
+diff --git a/test/jdk/jdk/jfr/event/oldobject/TestListenerLeak.java b/test/jdk/jdk/jfr/event/oldobject/TestListenerLeak.java
+index a275eda517..2cb092e60b 100644
+--- a/test/jdk/jdk/jfr/event/oldobject/TestListenerLeak.java
++++ b/test/jdk/jdk/jfr/event/oldobject/TestListenerLeak.java
 @@ -1,5 +1,5 @@
  /*
-- * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -67,12 +67,8 @@ public String toString() {
-     public ThreadContext getContext() throws IllegalThreadStateException {
-         long[] data = debugger.getThreadIntegerRegisterSet(unique_thread_id);
-         ThreadContext context = BsdThreadContextFactory.createThreadContext(debugger);
--        // null means we failed to get the register set for some reason. The caller
--        // is responsible for dealing with the set of null registers in that case.
--        if (data != null) {
--            for (int i = 0; i < data.length; i++) {
--                context.setRegister(i, data[i]);
--            }
-+        for (int i = 0; i < data.length; i++) {
-+            context.setRegister(i, data[i]);
+@@ -74,15 +74,17 @@ public class TestListenerLeak {
+ 
+     public static void main(String[] args) throws Exception {
+         WhiteBox.setWriteAllObjectSamples(true);
+-
+-        try (Recording r = new Recording()) {
+-            r.enable(EventNames.OldObjectSample).withStackTrace().with("cutoff", "infinity");
+-            r.start();
+-            listenerLeak();
+-            r.stop();
+-            List<RecordedEvent> events = Events.fromRecording(r);
+-            if (OldObjects.countMatchingEvents(events, Stuff[].class, null, null, -1, "listenerLeak") == 0) {
+-                throw new Exception("Could not find leak with " + Stuff[].class);
++        while (true) {
++            try (Recording r = new Recording()) {
++                r.enable(EventNames.OldObjectSample).withStackTrace().with("cutoff", "infinity");
++                r.start();
++                listenerLeak();
++                r.stop();
++                List<RecordedEvent> events = Events.fromRecording(r);
++                if (OldObjects.countMatchingEvents(events, Stuff[].class, null, null, -1, "listenerLeak") != 0) {
++                    return; // Success
++                }
++                System.out.println("Could not find leak with " + Stuff[].class + ". Retrying.");
+             }
          }
-         return context;
      }
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java
-index 6a0648f508a..cb6712b58ee 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java
-@@ -173,7 +173,7 @@ public WorkerThreadTask execute(WorkerThreadTask task) throws DebuggerException
-                 } catch (InterruptedException x) {}
-              }
-              if (lastException != null) {
--                throw new DebuggerException(lastException.getMessage(), lastException);
-+                throw new DebuggerException(lastException);
-              } else {
-                 return task;
-              }
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java
-index 3fe795d34bc..52307b9cdcf 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java
+diff --git a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
+index 7990c49a1f..abeff80e5e 100644
+--- a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
++++ b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
 @@ -1,5 +1,5 @@
  /*
-- * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2013, 2022, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -73,12 +73,8 @@ public String toString() {
-     public ThreadContext getContext() throws IllegalThreadStateException {
-         long[] data = debugger.getThreadIntegerRegisterSet(lwp_id);
-         ThreadContext context = LinuxThreadContextFactory.createThreadContext(debugger);
--        // null means we failed to get the register set for some reason. The caller
--        // is responsible for dealing with the set of null registers in that case.
--        if (data != null) {
--            for (int i = 0; i < data.length; i++) {
--                context.setRegister(i, data[i]);
--            }
-+        for (int i = 0; i < data.length; i++) {
-+            context.setRegister(i, data[i]);
+@@ -54,8 +54,8 @@ public class TestCPUInformation {
+             Events.assertField(event, "hwThreads").atLeast(1);
+             Events.assertField(event, "cores").atLeast(1);
+             Events.assertField(event, "sockets").atLeast(1);
+-            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390");
+-            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390");
++            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390");
++            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390");
          }
-         return context;
      }
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java
-index 377650a0a1c..ec5aea35e8c 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java
+ }
+diff --git a/test/jdk/sun/util/calendar/zi/Month.java b/test/jdk/sun/util/calendar/zi/Month.java
+index cb60b8d441..bab909f763 100644
+--- a/test/jdk/sun/util/calendar/zi/Month.java
++++ b/test/jdk/sun/util/calendar/zi/Month.java
 @@ -1,5 +1,5 @@
  /*
-- * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2000, 2024, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -30,9 +30,9 @@
+@@ -21,11 +21,6 @@
+  * questions.
+  */
+ 
+-import java.util.ArrayList;
+-import java.util.HashMap;
+-import java.util.List;
+-import java.util.Map;
+-
+ /**
+  * Month enum handles month related manipulation.
+  *
+@@ -47,15 +42,6 @@ enum Month {
  
- class WindbgAMD64Thread implements ThreadProxy {
-   private WindbgDebugger debugger;
--  private long           sysId; // SystemID for Windows thread, stored in OSThread::_thread_id
-+  private long           sysId;
-   private boolean        gotID;
--  private long           id;    // ThreadID for Windows thread,  returned by GetThreadIdBySystemId
-+  private long           id;
+     private final String abbr;
  
-   // The address argument must be the address of the OSThread::_thread_id
-   WindbgAMD64Thread(WindbgDebugger debugger, Address addr) {
-@@ -50,12 +50,8 @@ class WindbgAMD64Thread implements ThreadProxy {
-   public ThreadContext getContext() throws IllegalThreadStateException {
-     long[] data = debugger.getThreadIntegerRegisterSet(getThreadID());
-     WindbgAMD64ThreadContext context = new WindbgAMD64ThreadContext(debugger);
--    // null means we failed to get the register set for some reason. The caller
--    // is responsible for dealing with the set of null registers in that case.
--    if (data != null) {
--        for (int i = 0; i < data.length; i++) {
--            context.setRegister(i, data[i]);
+-    private static final Map<String,Month> abbreviations
+-                                = new HashMap<String,Month>(12);
+-
+-    static {
+-        for (Month m : Month.values()) {
+-            abbreviations.put(m.abbr, m);
 -        }
-+    for (int i = 0; i < data.length; i++) {
-+      context.setRegister(i, data[i]);
+-    }
+-
+     private Month(String abbr) {
+         this.abbr = abbr;
      }
-     return context;
-   }
-@@ -90,7 +86,6 @@ public String toString() {
-   private long getThreadID() {
-     if (!gotID) {
-        id = debugger.getThreadIdFromSysId(sysId);
--       gotID = true;
+@@ -70,11 +56,22 @@ enum Month {
+      * @return the Month value
+      */
+     static Month parse(String name) {
+-        Month m = abbreviations.get(name);
+-        if (m != null) {
+-            return m;
+-        }
+-        return null;
++        int len = name.length();
++
++        if (name.regionMatches(true, 0, "January", 0, len)) return Month.JANUARY;
++        if (name.regionMatches(true, 0, "February", 0, len)) return Month.FEBRUARY;
++        if (name.regionMatches(true, 0, "March", 0, len)) return Month.MARCH;
++        if (name.regionMatches(true, 0, "April", 0, len)) return Month.APRIL;
++        if (name.regionMatches(true, 0, "May", 0, len)) return Month.MAY;
++        if (name.regionMatches(true, 0, "June", 0, len)) return Month.JUNE;
++        if (name.regionMatches(true, 0, "July", 0, len)) return Month.JULY;
++        if (name.regionMatches(true, 0, "August", 0, len)) return Month.AUGUST;
++        if (name.regionMatches(true, 0, "September", 0, len)) return Month.SEPTEMBER;
++        if (name.regionMatches(true, 0, "October", 0, len)) return Month.OCTOBER;
++        if (name.regionMatches(true, 0, "November", 0, len)) return Month.NOVEMBER;
++        if (name.regionMatches(true, 0, "December", 0, len)) return Month.DECEMBER;
++
++        throw new IllegalArgumentException("Unknown month: " + name);
      }
  
-     return id;
-diff --git a/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp b/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp
-index e3b218b4dae..314cf69c957 100644
---- a/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp
-+++ b/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp
-@@ -45,7 +45,6 @@
+     /**
+diff --git a/test/jdk/sun/util/calendar/zi/RuleDay.java b/test/jdk/sun/util/calendar/zi/RuleDay.java
+index bc730944b4..9cd81c1e52 100644
+--- a/test/jdk/sun/util/calendar/zi/RuleDay.java
++++ b/test/jdk/sun/util/calendar/zi/RuleDay.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2000, 2024, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -21,11 +21,6 @@
+  * questions.
+  */
  
- #include <limits.h>
- #include <windows.h>
--#include <inttypes.h>
+-import java.util.ArrayList;
+-import java.util.HashMap;
+-import java.util.List;
+-import java.util.Map;
+-
+ /**
+  * RuleDay class represents the value of the "ON" field.  The day of
+  * week values start from 1 following the {@link java.util.Calendar}
+@@ -34,13 +29,6 @@ import java.util.Map;
+  * @since 1.4
+  */
+ class RuleDay {
+-    private static final Map<String,DayOfWeek> abbreviations = new HashMap<String,DayOfWeek>(7);
+-    static {
+-        for (DayOfWeek day : DayOfWeek.values()) {
+-            abbreviations.put(day.getAbbr(), day);
+-        }
+-    }
+-
+     private String dayName = null;
+     private DayOfWeek dow;
+     private boolean lastOne = false;
+@@ -166,13 +154,23 @@ class RuleDay {
+         return sign + toString(d);
+     }
  
- #define DEBUG_NO_IMPLEMENTATION
- #include <dbgeng.h>
-@@ -766,16 +765,9 @@ JNIEXPORT jlong JNICALL Java_sun_jvm_hotspot_debugger_windbg_WindbgDebuggerLocal
-   CHECK_EXCEPTION_(0);
+-    private static DayOfWeek getDOW(String abbr) {
+-        return abbreviations.get(abbr);
++    private static DayOfWeek getDOW(String name) {
++        int len = name.length();
++
++        if (name.regionMatches(true, 0, "Monday", 0, len)) return DayOfWeek.MONDAY;
++        if (name.regionMatches(true, 0, "Tuesday", 0, len)) return DayOfWeek.TUESDAY;
++        if (name.regionMatches(true, 0, "Wednesday", 0, len)) return DayOfWeek.WEDNESDAY;
++        if (name.regionMatches(true, 0, "Thursday", 0, len)) return DayOfWeek.THURSDAY;
++        if (name.regionMatches(true, 0, "Friday", 0, len)) return DayOfWeek.FRIDAY;
++        if (name.regionMatches(true, 0, "Saturday", 0, len)) return DayOfWeek.SATURDAY;
++        if (name.regionMatches(true, 0, "Sunday", 0, len)) return DayOfWeek.SUNDAY;
++
++        throw new IllegalArgumentException("Unknown day-of-week: " + name);
+     }
  
-   ULONG id = 0;
--  HRESULT hr = ptrIDebugSystemObjects->GetThreadIdBySystemId((ULONG)sysId, &id);
--  if (hr != S_OK) {
--    // This is not considered fatal and does happen on occassion, usually with an
--    // 0x80004002 "No such interface supported". The root cause is not fully understood,
--    // but by ignoring this error and returning NULL, stacking walking code will get
--    // null registers and fallback to using the "last java frame" if setup.
--   printf("WARNING: GetThreadIdBySystemId failed with 0x%x for sysId (%" PRIu64 ")\n",
--           hr, sysId);
--    return -1;
--  }
-+  COM_VERIFY_OK_(ptrIDebugSystemObjects->GetThreadIdBySystemId((ULONG)sysId, &id),
-+                 "Windbg Error: GetThreadIdBySystemId failed!", 0);
-+
-   return (jlong) id;
- }
+     /**
+      * Converts the specified day of week value to the day-of-week
+-     * name defined in {@link java.util.Calenda}.
++     * name defined in {@link java.util.Calendar}.
+      * @param dow 1-based day of week value
+      * @return the Calendar day of week name with "Calendar." prefix.
+      * @throws IllegalArgumentException if the specified dow value is out of range.
+diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java
+index 6269373c2b..e1511772e7 100644
+--- a/test/lib/jdk/test/lib/Platform.java
++++ b/test/lib/jdk/test/lib/Platform.java
+@@ -205,6 +205,10 @@ public class Platform {
+         return isArch("arm.*");
+     }
  
-
-From 4b01e13731fc330ca3d57a5cd532c91bc66579c8 Mon Sep 17 00:00:00 2001
-From: Kuai Wei <kuaiwei.kw@alibaba-inc.com>
-Date: Wed, 31 Jan 2024 17:26:31 +0800
-Subject: [PATCH 140/140] Remove unused zSyscall_linux_riscv.hpp
-
----
- .../linux_riscv/gc/z/zSyscall_linux_riscv.hpp | 42 -------------------
- 1 file changed, 42 deletions(-)
- delete mode 100644 src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp
-
-diff --git a/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp
-deleted file mode 100644
-index 1aa58f27871..00000000000
---- a/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp
-+++ /dev/null
-@@ -1,42 +0,0 @@
--/*
-- * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
-- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-- *
-- * This code is free software; you can redistribute it and/or modify it
-- * under the terms of the GNU General Public License version 2 only, as
-- * published by the Free Software Foundation.
-- *
-- * This code is distributed in the hope that it will be useful, but WITHOUT
-- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-- * version 2 for more details (a copy is included in the LICENSE file that
-- * accompanied this code).
-- *
-- * You should have received a copy of the GNU General Public License version
-- * 2 along with this work; if not, write to the Free Software Foundation,
-- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-- *
-- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-- * or visit www.oracle.com if you need additional information or have any
-- * questions.
-- *
-- */
--
--#ifndef OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP
--#define OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP
--
--#include <sys/syscall.h>
--
--//
--// Support for building on older Linux systems
--//
--
--#ifndef SYS_memfd_create
--#define SYS_memfd_create     279
--#endif
--#ifndef SYS_fallocate
--#define SYS_fallocate        47
--#endif
--
--#endif // OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP
++    public static boolean isRISCV64() {
++        return isArch("riscv64");
++    }
++
+     public static boolean isPPC() {
+         return isArch("ppc.*");
+     }
diff --git a/G1-iterate-region-by-bitmap-rather-than-obj-size-in.patch b/G1-iterate-region-by-bitmap-rather-than-obj-size-in.patch
index 8d4548aad36df00f937ee2babb039206bb059a35..fc0fb3465ddce315670454a0d7996372dcc0eaaa 100755
--- a/G1-iterate-region-by-bitmap-rather-than-obj-size-in.patch
+++ b/G1-iterate-region-by-bitmap-rather-than-obj-size-in.patch
@@ -367,4 +367,4 @@ index 000000000..85b49171c
 --- /dev/null
 +++ b/version.txt
 @@ -0,0 +1 @@
-+11.0.24.0.13
++11.0.25.0.13
diff --git a/LoongArch64-support.patch b/LoongArch64-support.patch
index bf78938519963d04f67592ed50d962f0cee255e9..8446b94c018c66bc75aa7fa9597bdb569f3bf8a4 100644
--- a/LoongArch64-support.patch
+++ b/LoongArch64-support.patch
@@ -18679,10 +18679,10 @@ index 0000000000..80dff0c762
 +
 diff --git a/src/hotspot/cpu/loongarch/loongarch_64.ad b/src/hotspot/cpu/loongarch/loongarch_64.ad
 new file mode 100644
-index 0000000000..cc3824a402
+index 0000000000..c10f0b70cf
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/loongarch_64.ad
-@@ -0,0 +1,13917 @@
+@@ -0,0 +1,13928 @@
 +//
 +// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
 +// Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
@@ -25123,6 +25123,17 @@ index 0000000000..cc3824a402
 +  ins_pipe(empty);
 +%}
 +
++instruct same_addr_load_fence() %{
++  match(SameAddrLoadFence);
++  ins_cost(400);
++
++  format %{ "MEMBAR @ same_addr_load_fence" %}
++  ins_encode %{
++    __ dbar(0x700);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
 +//----------Move Instructions--------------------------------------------------
 +instruct castX2P(mRegP dst, mRegL src) %{
 +  match(Set dst (CastX2P src));
@@ -38046,13 +38057,13 @@ index 0000000000..49302590c3
 +#endif // CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP
 diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp
 new file mode 100644
-index 0000000000..3ed4c36651
+index 0000000000..6e27a69747
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp
-@@ -0,0 +1,1625 @@
+@@ -0,0 +1,1626 @@
 +/* Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2018, Cavium. All rights reserved. (By BELLSOFT)
-+ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * Copyright (c) 2022, 2024, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -38951,7 +38962,7 @@ index 0000000000..3ed4c36651
 +      b(Q_DONE);
 +    bind(JX_IS_0);
 +      if (UseLASX) {
-+        xvfmul_d(v28, v18, v6);                // f[0,1] * x[0]
++        xvfmul_d(v28, v18, v6);                // f[0,3] * x[0]
 +        fmul_d(v30, v19, v6);                  // f[4] * x[0]
 +      } else {
 +        vfmul_d(v28, v18, v6);                 // f[0,1] * x[0]
@@ -39180,6 +39191,7 @@ index 0000000000..3ed4c36651
 +          st_w(tmp2, SCR2, 0);
 +          addi_w(SCR1, SCR1, 24);
 +          addi_w(jz, jz, 1);
++          alsl_d(SCR2, jz, iqBase, 2 - 1);
 +          st_w(tmp3, SCR2, 0);                               // iq[jz] = (int) fw
 +          b(Z_ZERO_CHECK_DONE);
 +        bind(Z_IS_LESS_THAN_TWO24B);
@@ -104792,7 +104804,7 @@ index 3687754e71..791e4ed43f 100644
    void generate_c1_load_barrier_stub(LIR_Assembler* ce,
                                       ZLoadBarrierStubC1* stub) const;
 diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
-index 0af357ea56..66a8006780 100644
+index 2842a11f92..4f58ec4be3 100644
 --- a/src/hotspot/os/linux/os_linux.cpp
 +++ b/src/hotspot/os/linux/os_linux.cpp
 @@ -23,6 +23,12 @@
@@ -104808,7 +104820,7 @@ index 0af357ea56..66a8006780 100644
  // no precompiled headers
  #include "jvm.h"
  #include "classfile/classLoader.hpp"
-@@ -4068,6 +4074,8 @@ size_t os::Linux::find_large_page_size() {
+@@ -4060,6 +4066,8 @@ size_t os::Linux::find_large_page_size() {
      IA64_ONLY(256 * M)
      PPC_ONLY(4 * M)
      S390_ONLY(1 * M)
@@ -108482,6 +108494,31 @@ index 2b0fa83c1a..270e0bc180 100644
 +const bool      ZPlatformLoadBarrierTestResultInRegister = false;
 +
  #endif // OS_CPU_LINUX_X86_ZGLOBALS_LINUX_X86_HPP
+diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp
+index f810fde767..90f733cdf9 100644
+--- a/src/hotspot/share/adlc/formssel.cpp
++++ b/src/hotspot/share/adlc/formssel.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2023, These
++ * modifications are Copyright (c) 2023, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ // FORMS.CPP - Definitions for ADL Parser Forms Classes
+ #include "adlc.hpp"
+ 
+@@ -4109,6 +4115,7 @@ bool MatchRule::is_ideal_membar() const {
+     !strcmp(_opType,"MemBarVolatile") ||
+     !strcmp(_opType,"MemBarCPUOrder") ||
+     !strcmp(_opType,"MemBarStoreStore") ||
++    !strcmp(_opType,"SameAddrLoadFence" ) ||
+     !strcmp(_opType,"OnSpinWait");
+ }
+ 
 diff --git a/src/hotspot/share/asm/codeBuffer.cpp b/src/hotspot/share/asm/codeBuffer.cpp
 index 4912f88056..a420f7807b 100644
 --- a/src/hotspot/share/asm/codeBuffer.cpp
@@ -109872,6 +109909,46 @@ index 84815adea8..57e29f1295 100644
      __ move(dirty, card_addr);
      __ branch_destination(L_already_dirty->label());
    } else {
+diff --git a/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp b/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp
+index 5452756444..62adf9971e 100644
+--- a/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp
++++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2023, These
++ * modifications are Copyright (c) 2023, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "gc/shared/c2/barrierSetC2.hpp"
+ #include "opto/arraycopynode.hpp"
+@@ -197,6 +203,8 @@ public:
+ 
+     bool is_volatile = (decorators & MO_SEQ_CST) != 0;
+     bool is_acquire = (decorators & MO_ACQUIRE) != 0;
++    bool is_relaxed = (decorators & MO_RELAXED) != 0;
++    bool is_unsafe = (decorators & C2_UNSAFE_ACCESS) != 0;
+ 
+     // If reference is volatile, prevent following volatiles ops from
+     // floating up before the volatile access.
+@@ -227,6 +235,13 @@ public:
+         assert(_leading_membar == NULL || support_IRIW_for_not_multiple_copy_atomic_cpu, "no leading membar expected");
+         Node* mb = kit->insert_mem_bar(Op_MemBarAcquire, n);
+         mb->as_MemBar()->set_trailing_load();
++      } else if (is_relaxed && is_unsafe) {
++#ifdef LOONGARCH64
++        assert(kit != NULL, "unsupported at optimization time");
++        Node* n = _access.raw_access();
++        Node* mb = kit->insert_mem_bar(Op_SameAddrLoadFence, n);
++        mb->as_MemBar()->set_trailing_load();
++#endif
+       }
+     }
+   }
 diff --git a/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.cpp b/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.cpp
 index f51d186484..506f0301fe 100644
 --- a/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.cpp
@@ -110179,6 +110256,56 @@ index 6c631f5458..9865106720 100644
  }
  
  // Note that the forwardee is not the same thing as the displaced_mark.
+diff --git a/src/hotspot/share/opto/classes.hpp b/src/hotspot/share/opto/classes.hpp
+index 7a9bd91117..b46e9bcf5b 100644
+--- a/src/hotspot/share/opto/classes.hpp
++++ b/src/hotspot/share/opto/classes.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2023, These
++ * modifications are Copyright (c) 2023, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "utilities/macros.hpp"
+ 
+ // The giant table of Node classes.
+@@ -217,6 +223,7 @@ macro(StoreFence)
+ macro(MemBarReleaseLock)
+ macro(MemBarVolatile)
+ macro(MemBarStoreStore)
++macro(SameAddrLoadFence)
+ macro(MergeMem)
+ macro(MinD)
+ macro(MinF)
+diff --git a/src/hotspot/share/opto/compile.cpp b/src/hotspot/share/opto/compile.cpp
+index da06b47400..510438d675 100644
+--- a/src/hotspot/share/opto/compile.cpp
++++ b/src/hotspot/share/opto/compile.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2023, These
++ * modifications are Copyright (c) 2023, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "asm/macroAssembler.hpp"
+ #include "asm/macroAssembler.inline.hpp"
+@@ -3448,6 +3454,7 @@ void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) {
+       n->set_req(MemBarNode::Precedent, top());
+     }
+     break;
++  case Op_SameAddrLoadFence:
+   case Op_MemBarAcquire: {
+     if (n->as_MemBar()->trailing_load() && n->req() > MemBarNode::Precedent) {
+       // At parse time, the trailing MemBarAcquire for a volatile load
 diff --git a/src/hotspot/share/opto/compile.hpp b/src/hotspot/share/opto/compile.hpp
 index 569fbc6d69..c1f1b82ffa 100644
 --- a/src/hotspot/share/opto/compile.hpp
@@ -110192,6 +110319,63 @@ index 569fbc6d69..c1f1b82ffa 100644
      MAX_inst_size       = 2048,
  #else
      MAX_inst_size       = 1024,
+diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp
+index e194386b56..d5e6dd71a7 100644
+--- a/src/hotspot/share/opto/memnode.cpp
++++ b/src/hotspot/share/opto/memnode.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2023, These
++ * modifications are Copyright (c) 2023, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "classfile/systemDictionary.hpp"
+ #include "compiler/compileLog.hpp"
+@@ -3190,6 +3196,7 @@ MemBarNode* MemBarNode::make(Compile* C, int opcode, int atp, Node* pn) {
+   case Op_MemBarReleaseLock: return new MemBarReleaseLockNode(C, atp, pn);
+   case Op_MemBarVolatile:    return new MemBarVolatileNode(C, atp, pn);
+   case Op_MemBarCPUOrder:    return new MemBarCPUOrderNode(C, atp, pn);
++  case Op_SameAddrLoadFence: return new SameAddrLoadFenceNode(C, atp, pn);
+   case Op_OnSpinWait:        return new OnSpinWaitNode(C, atp, pn);
+   case Op_Initialize:        return new InitializeNode(C, atp, pn);
+   case Op_MemBarStoreStore:  return new MemBarStoreStoreNode(C, atp, pn);
+diff --git a/src/hotspot/share/opto/memnode.hpp b/src/hotspot/share/opto/memnode.hpp
+index e4676977e1..bf1efbf835 100644
+--- a/src/hotspot/share/opto/memnode.hpp
++++ b/src/hotspot/share/opto/memnode.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2023, These
++ * modifications are Copyright (c) 2023, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_OPTO_MEMNODE_HPP
+ #define SHARE_VM_OPTO_MEMNODE_HPP
+ 
+@@ -1293,6 +1299,14 @@ public:
+   virtual uint ideal_reg() const { return 0; } // not matched in the AD file
+ };
+ 
++// Used to prevent LoadLoad reorder for same address.
++class SameAddrLoadFenceNode: public MemBarNode {
++public:
++  SameAddrLoadFenceNode(Compile* C, int alias_idx, Node* precedent)
++    : MemBarNode(C, alias_idx, precedent) {}
++  virtual int Opcode() const;
++};
++
+ class OnSpinWaitNode: public MemBarNode {
+ public:
+   OnSpinWaitNode(Compile* C, int alias_idx, Node* precedent)
 diff --git a/src/hotspot/share/opto/output.cpp b/src/hotspot/share/opto/output.cpp
 index b6540e06a3..52d1fc9fb9 100644
 --- a/src/hotspot/share/opto/output.cpp
@@ -110338,7 +110522,7 @@ index ce23aafa8f..d3dfb74d5b 100644
    assert(_owner != Self, "invariant");
    assert(_Responsible != Self, "invariant");
 diff --git a/src/hotspot/share/runtime/os.cpp b/src/hotspot/share/runtime/os.cpp
-index e0f4a2af1f..09cc4b1ba5 100644
+index 1c540bb621..0e44240d40 100644
 --- a/src/hotspot/share/runtime/os.cpp
 +++ b/src/hotspot/share/runtime/os.cpp
 @@ -22,6 +22,12 @@
@@ -110397,6 +110581,31 @@ index e086f794cd..f480195775 100644
  static const double
  S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
  S2  =  8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
+diff --git a/src/hotspot/share/runtime/vmStructs.cpp b/src/hotspot/share/runtime/vmStructs.cpp
+index adce6da6a3..db099a2985 100644
+--- a/src/hotspot/share/runtime/vmStructs.cpp
++++ b/src/hotspot/share/runtime/vmStructs.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2023, These
++ * modifications are Copyright (c) 2023, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "ci/ciField.hpp"
+ #include "ci/ciInstance.hpp"
+@@ -1642,6 +1648,7 @@ typedef PaddedEnd<ObjectMonitor>              PaddedObjectMonitor;
+   declare_c2_type(StoreFenceNode, MemBarNode)                             \
+   declare_c2_type(MemBarVolatileNode, MemBarNode)                         \
+   declare_c2_type(MemBarCPUOrderNode, MemBarNode)                         \
++  declare_c2_type(SameAddrLoadFenceNode, MemBarNode)                      \
+   declare_c2_type(OnSpinWaitNode, MemBarNode)                             \
+   declare_c2_type(InitializeNode, MemBarNode)                             \
+   declare_c2_type(ThreadLocalNode, Node)                                  \
 diff --git a/src/hotspot/share/utilities/globalDefinitions.hpp b/src/hotspot/share/utilities/globalDefinitions.hpp
 index c758fc5743..a8c4638f6a 100644
 --- a/src/hotspot/share/utilities/globalDefinitions.hpp
@@ -110672,7 +110881,7 @@ index 8318e8e021..07064e76ee 100644
  // This C bool type must be int for compatibility with Linux calls and
  // it would be a mistake to equivalence it to C++ bool on many platforms
 diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
-index de5254d859..eefe55959c 100644
+index c22b5d1cb3..36d6343960 100644
 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
 +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
 @@ -22,6 +22,12 @@
@@ -110688,12 +110897,12 @@ index de5254d859..eefe55959c 100644
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
-@@ -142,7 +148,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use
- #define PTRACE_GETREGS_REQ PT_GETREGS
- #endif
- 
--#ifdef PTRACE_GETREGS_REQ
-+#if defined(PTRACE_GETREGS_REQ) && !defined(loongarch64)
+@@ -151,7 +157,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use
+    return false;
+  }
+  return true;
+-#elif defined(PTRACE_GETREGS_REQ)
++#elif defined(PTRACE_GETREGS_REQ) && !defined(loongarch64)
   if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) {
     print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp %d\n", pid);
     return false;
@@ -116585,7 +116794,7 @@ index 127bb6abcd..c9277604ae 100644
              Platform.isSolaris();
      }
 diff --git a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
-index 77458554b7..05aee6b84c 100644
+index 126a43a900..55bd135f6e 100644
 --- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
 +++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
 @@ -45,7 +45,7 @@ import java.util.Set;
@@ -116625,35 +116834,8 @@ index 7990c49a1f..025048c6b0 100644
          }
      }
  }
-diff --git a/test/jdk/sun/security/pkcs11/PKCS11Test.java b/test/jdk/sun/security/pkcs11/PKCS11Test.java
-index b14daf6c6d..da33514c75 100644
---- a/test/jdk/sun/security/pkcs11/PKCS11Test.java
-+++ b/test/jdk/sun/security/pkcs11/PKCS11Test.java
-@@ -21,6 +21,12 @@
-  * questions.
-  */
- 
-+/*
-+ * This file has been modified by Loongson Technology in 2022, These
-+ * modifications are Copyright (c) 2021, 2022, Loongson Technology, and are made
-+ * available on the same license terms set forth above.
-+ */
-+
- // common infrastructure for SunPKCS11 tests
- 
- import java.io.BufferedReader;
-@@ -747,6 +753,9 @@ public abstract class PKCS11Test {
-                 "/usr/lib64/" });
-         osMap.put("Linux-ppc64-64", new String[] { "/usr/lib64/" });
-         osMap.put("Linux-ppc64le-64", new String[] { "/usr/lib64/" });
-+        osMap.put("Linux-mips64el-64", new String[]{"/usr/lib64/"});
-+        osMap.put("Linux-loongarch64-64", new String[]{"/usr/lib/loongarch64-linux-gnu/",
-+                "/usr/lib64/" });
-         osMap.put("Linux-s390x-64", new String[] { "/usr/lib64/" });
-         osMap.put("Windows-x86-32", new String[] {});
-         osMap.put("Windows-amd64-64", new String[] {});
 diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java
-index 5b3f1889cb..aaf8867a7c 100644
+index 6269373c2b..440ec4664f 100644
 --- a/test/lib/jdk/test/lib/Platform.java
 +++ b/test/lib/jdk/test/lib/Platform.java
 @@ -21,6 +21,12 @@
@@ -116668,8 +116850,8 @@ index 5b3f1889cb..aaf8867a7c 100644
 +
  package jdk.test.lib;
  
- import java.io.FileNotFoundException;
-@@ -226,6 +232,14 @@ public class Platform {
+ import java.io.BufferedReader;
+@@ -229,6 +235,14 @@ public class Platform {
          return isArch("(i386)|(x86(?!_64))");
      }
  
diff --git a/change-ActivePrcoessorCount-only-for-HBase.patch b/change-ActivePrcoessorCount-only-for-HBase.patch
new file mode 100644
index 0000000000000000000000000000000000000000..3d604e544365aea7793dd6e3212c8369c4c269ff
--- /dev/null
+++ b/change-ActivePrcoessorCount-only-for-HBase.patch
@@ -0,0 +1,232 @@
+diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
+index faf109ab1..ab83d2a6b 100644
+--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
++++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
+@@ -121,7 +121,8 @@ public:
+   static int get_initial_sve_vector_length()  { return _initial_sve_vector_length; };
+
+   static bool is_hisi_enabled() {
+-    if (_cpu == CPU_HISILICON && (_model == 0xd01 || _model == 0xd02 || _model == 0xd03)) {
++    if (_cpu == CPU_HISILICON && (_model == 0xd01 || _model == 0xd02 || _model == 0xd03 ||
++                           _model == 0xd22 || _model == 0xd45)) {
+       return true;
+     }
+     return false;
+diff --git a/make/hotspot/symbols/symbols-shared b/make/hotspot/symbols/symbols-shared
+index 5d26d1028..d955c25f2 100644
+--- a/make/hotspot/symbols/symbols-shared
++++ b/make/hotspot/symbols/symbols-shared
+@@ -28,6 +28,7 @@ jio_snprintf
+ jio_vfprintf
+ jio_vsnprintf
+ JNI_CreateJavaVM
++JNI_SetCParam
+ JNI_GetCreatedJavaVMs
+ JNI_GetDefaultJavaVMInitArgs
+ JVM_FindClassFromBootLoader
+diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
+index 9c2bdbbad..552267b0f 100644
+--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
++++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
+@@ -413,3 +413,28 @@ void VM_Version::initialize() {
+
+   UNSUPPORTED_OPTION(CriticalJNINatives);
+ }
++
++int VM_Version::get_cpu_model() {
++  int cpu_lines = 0;
++  if (FILE *f = fopen("/proc/cpuinfo", "r")) {
++    char buf[128], *p;
++    while (fgets(buf, sizeof (buf), f) != NULL) {
++      if ((p = strchr(buf, ':')) != NULL) {
++        long v = strtol(p+1, NULL, 0);
++        if (strncmp(buf, "CPU implementer", sizeof "CPU implementer" - 1) == 0) {
++          _cpu = v;
++          cpu_lines++;
++        } else if (strncmp(buf, "CPU variant", sizeof "CPU variant" - 1) == 0) {
++          _variant = v;
++        } else if (strncmp(buf, "CPU part", sizeof "CPU part" - 1) == 0) {
++          if (_model != v)  _model2 = _model;
++          _model = v;
++        } else if (strncmp(buf, "CPU revision", sizeof "CPU revision" - 1) == 0) {
++          _revision = v;
++        }
++      }
++    }
++    fclose(f);
++  }
++  return cpu_lines;
++}
+\ No newline at end of file
+diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
+index f03da8710..faf109ab1 100644
+--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
++++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
+@@ -112,6 +112,7 @@ public:
+     CPU_DMB_ATOMICS  = (1 << 31),
+   };
+
++  static int get_cpu_model();
+   static int cpu_family()                     { return _cpu; }
+   static int cpu_model()                      { return _model; }
+   static int cpu_model2()                     { return _model2; }
+diff --git a/src/hotspot/os_cpu/linux_aarch64/thread_linux_aarch64.cpp b/src/hotspot/os_cpu/linux_aarch64/thread_linux_aarch64.cpp
+index 9084daeaa..0d7e03cd8 100644
+--- a/src/hotspot/os_cpu/linux_aarch64/thread_linux_aarch64.cpp
++++ b/src/hotspot/os_cpu/linux_aarch64/thread_linux_aarch64.cpp
+@@ -46,6 +46,35 @@ bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext,
+   return pd_get_top_frame(fr_addr, ucontext, isInJava);
+ }
+
++inline unsigned int stringHash(const char* str) {
++    unsigned int seed = 13;
++    unsigned int hash = 0;
++    while(*str) {
++        hash = hash * seed + (*str++);
++    }
++
++    return (hash & 0x7fffffff);
++}
++
++void JavaThread::os_linux_aarch64_options(int apc, char **name) {
++  if (name == NULL) {
++    return;
++  }
++  VM_Version::get_cpu_model();
++  if (VM_Version::is_hisi_enabled()) {
++    int i = 0;
++    int step = 0;
++    while (name[i] != NULL) {
++      if (stringHash(name[i]) == 1396789436) {
++        if (FLAG_IS_DEFAULT(ActiveProcessorCount) && (UseG1GC || UseParallelGC || UseZGC) && apc > 8)
++          FLAG_SET_DEFAULT(ActiveProcessorCount, 8);
++        break;
++      }
++      i++;
++    }
++  }
++}
++
+ bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) {
+   assert(this->is_Java_thread(), "must be JavaThread");
+   JavaThread* jt = (JavaThread *)this;
+diff --git a/src/hotspot/os_cpu/linux_aarch64/thread_linux_aarch64.hpp b/src/hotspot/os_cpu/linux_aarch64/thread_linux_aarch64.hpp
+index 985b664aa..521ac0dcc 100644
+--- a/src/hotspot/os_cpu/linux_aarch64/thread_linux_aarch64.hpp
++++ b/src/hotspot/os_cpu/linux_aarch64/thread_linux_aarch64.hpp
+@@ -55,6 +55,8 @@
+   bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext,
+     bool isInJava);
+
++  static void os_linux_aarch64_options(int apc, char **name);
++
+   bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava);
+ private:
+   bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava);
+diff --git a/src/hotspot/share/prims/jni.cpp b/src/hotspot/share/prims/jni.cpp
+index 289283dca..0b2138d98 100644
+--- a/src/hotspot/share/prims/jni.cpp
++++ b/src/hotspot/share/prims/jni.cpp
+@@ -3951,6 +3951,11 @@ _JNI_IMPORT_OR_EXPORT_ jint JNICALL JNI_GetDefaultJavaVMInitArgs(void *args_) {
+ DT_RETURN_MARK_DECL(CreateJavaVM, jint
+                     , HOTSPOT_JNI_CREATEJAVAVM_RETURN(_ret_ref));
+
++const char** argv_for_execvp;
++_JNI_IMPORT_OR_EXPORT_ void JNICALL JNI_SetCParam(char** raw_argv) {
++  argv_for_execvp = (const char**)raw_argv;
++}
++
+ static jint JNI_CreateJavaVM_inner(JavaVM **vm, void **penv, void *args) {
+   HOTSPOT_JNI_CREATEJAVAVM_ENTRY((void **) vm, penv, args);
+
+diff --git a/src/hotspot/share/runtime/os.cpp b/src/hotspot/share/runtime/os.cpp
+index 1c540bb62..214bb21f9 100644
+--- a/src/hotspot/share/runtime/os.cpp
++++ b/src/hotspot/share/runtime/os.cpp
+@@ -447,6 +447,11 @@ static void signal_thread_entry(JavaThread* thread, TRAPS) {
+ }
+
+ void os::init_before_ergo() {
++#ifdef AARCH64
++    // global variables
++    extern char** argv_for_execvp;
++    JavaThread::os_linux_aarch64_options(active_processor_count(), argv_for_execvp);
++#endif
+   initialize_initial_active_processor_count();
+   // We need to initialize large page support here because ergonomics takes some
+   // decisions depending on large page support and the calculated large page size.
+diff --git a/src/java.base/share/native/include/jni.h b/src/java.base/share/native/include/jni.h
+index e15503f4d..4aaa75685 100644
+--- a/src/java.base/share/native/include/jni.h
++++ b/src/java.base/share/native/include/jni.h
+@@ -1948,6 +1948,9 @@ JNI_GetDefaultJavaVMInitArgs(void *args);
+ _JNI_IMPORT_OR_EXPORT_ jint JNICALL
+ JNI_CreateJavaVM(JavaVM **pvm, void **penv, void *args);
+
++_JNI_IMPORT_OR_EXPORT_ void JNICALL
++JNI_SetCParam(char** raw_argv);
++
+ _JNI_IMPORT_OR_EXPORT_ jint JNICALL
+ JNI_GetCreatedJavaVMs(JavaVM **, jsize, jsize *);
+
+diff --git a/src/java.base/share/native/libjli/java.c b/src/java.base/share/native/libjli/java.c
+index a38ddae63..120d8f33b 100644
+--- a/src/java.base/share/native/libjli/java.c
++++ b/src/java.base/share/native/libjli/java.c
+@@ -284,6 +284,7 @@ JLI_Launch(int argc, char ** argv,              /* main argc, argv */
+
+     ifn.CreateJavaVM = 0;
+     ifn.GetDefaultJavaVMInitArgs = 0;
++    ifn.raw_argv = argv;
+
+     if (JLI_IsTraceLauncher()) {
+         start = CounterGet();
+@@ -1524,6 +1525,7 @@ InitializeJVM(JavaVM **pvm, JNIEnv **penv, InvocationFunctions *ifn)
+                    i, args.options[i].optionString);
+     }
+
++    ifn->SetCParam(ifn->raw_argv);
+     r = ifn->CreateJavaVM(pvm, (void **)penv, &args);
+     JLI_MemFree(options);
+     return r == JNI_OK;
+diff --git a/src/java.base/share/native/libjli/java.h b/src/java.base/share/native/libjli/java.h
+index 45acece27..43ca5cf39 100644
+--- a/src/java.base/share/native/libjli/java.h
++++ b/src/java.base/share/native/libjli/java.h
+@@ -77,13 +77,16 @@
+  * Pointers to the needed JNI invocation API, initialized by LoadJavaVM.
+  */
+ typedef jint (JNICALL *CreateJavaVM_t)(JavaVM **pvm, void **env, void *args);
++typedef void (JNICALL *SetCParam_t)(char** raw_argv);
+ typedef jint (JNICALL *GetDefaultJavaVMInitArgs_t)(void *args);
+ typedef jint (JNICALL *GetCreatedJavaVMs_t)(JavaVM **vmBuf, jsize bufLen, jsize *nVMs);
+
+ typedef struct {
+     CreateJavaVM_t CreateJavaVM;
++    SetCParam_t SetCParam;
+     GetDefaultJavaVMInitArgs_t GetDefaultJavaVMInitArgs;
+     GetCreatedJavaVMs_t GetCreatedJavaVMs;
++    char** raw_argv;
+ } InvocationFunctions;
+
+ JNIEXPORT int JNICALL
+diff --git a/src/java.base/unix/native/libjli/java_md_solinux.c b/src/java.base/unix/native/libjli/java_md_solinux.c
+index 160f91975..7526c0d9f 100644
+--- a/src/java.base/unix/native/libjli/java_md_solinux.c
++++ b/src/java.base/unix/native/libjli/java_md_solinux.c
+@@ -615,6 +615,13 @@ LoadJavaVM(const char *jvmpath, InvocationFunctions *ifn)
+         return JNI_FALSE;
+     }
+
++    ifn->SetCParam = (SetCParam_t)
++        dlsym(libjvm, "JNI_SetCParam");
++    if (ifn->SetCParam == NULL) {
++        JLI_ReportErrorMessage(DLL_ERROR2, jvmpath, dlerror());
++        return JNI_FALSE;
++    }
++
+     ifn->GetDefaultJavaVMInitArgs = (GetDefaultJavaVMInitArgs_t)
+         dlsym(libjvm, "JNI_GetDefaultJavaVMInitArgs");
+     if (ifn->GetDefaultJavaVMInitArgs == NULL) {
+--
+2.21.0.windows.1
diff --git a/delete_expired_certificates.patch b/delete_expired_certificates.patch
index 3e654b7774f145bca6cee0a764b16d15dd779cde..19a0e1562473a34aaec8579fb4f75bd14d52c163 100644
--- a/delete_expired_certificates.patch
+++ b/delete_expired_certificates.patch
@@ -120,19 +120,18 @@ index 122a01901..c131bd493 100644
              + File.separator + "security" + File.separator + "cacerts";
  
      // The numbers of certs now.
--    private static final int COUNT = 110;
-+    private static final int COUNT = 107;
+-    private static final int COUNT = 112;
++    private static final int COUNT = 109;
  
      // SHA-256 of cacerts, can be generated with
      // shasum -a 256 cacerts | sed -e 's/../&:/g' | tr '[:lower:]' '[:upper:]' | cut -c1-95
      private static final String CHECKSUM
--            = "C1:68:B4:AC:51:BF:B5:C6:FD:20:69:17:E1:AF:E4:5B:01:9B:AA:3F:C3:9A:80:A8:51:53:74:2C:A2:04:B0:FF";
-+            = "D5:F6:74:0F:13:CF:6D:35:5E:10:04:C3:1B:57:C4:F4:A0:49:9A:26:38:89:53:C3:71:10:60:9D:48:20:E7:DE";
+-            = "8F:E0:6F:7F:21:59:33:A6:43:F3:48:FD:A3:4A:8E:28:35:AA:DD:6E:A5:43:56:F1:28:34:48:DF:5C:D2:7C:72";
++            = "20:83:CF:5E:F7:A9:E6:C6:06:ED:2C:28:3E:CE:AF:B9:BF:9D:26:CB:29:0C:E2:CF:B8:4F:DF:E9:59:5F:A6:3C";
 
      // map of cert alias to SHA-256 fingerprint
      @SuppressWarnings("serial")
-     private static final Map<String, String> FINGERPRINT_MAP = new HashMap<>() {
-@@ -109,8 +109,6 @@ public class VerifyCACerts {
+@@ -110,8 +110,6 @@ public class VerifyCACerts {
                      "7E:37:CB:8B:4C:47:09:0C:AB:36:55:1B:A6:F4:5D:B8:40:68:0F:BA:16:6A:95:2D:B1:00:71:7F:43:05:3F:C2");
              put("digicerthighassuranceevrootca [jdk]",
                      "74:31:E5:F4:C3:C1:CE:46:90:77:4F:0B:61:E0:54:40:88:3B:A9:A0:1E:D0:0B:A6:AB:D7:80:6E:D3:B1:18:CF");
@@ -141,7 +140,7 @@ index 122a01901..c131bd493 100644
              put("geotrustprimaryca [jdk]",
                      "37:D5:10:06:C5:12:EA:AB:62:64:21:F1:EC:8C:92:01:3F:C5:F8:2A:E9:8E:E5:33:EB:46:19:B8:DE:B4:D0:6C");
              put("geotrustprimarycag2 [jdk]",
-@@ -145,10 +143,6 @@ public class VerifyCACerts {
+@@ -146,10 +144,6 @@ public class VerifyCACerts {
                      "96:BC:EC:06:26:49:76:F3:74:60:77:9A:CF:28:C5:A7:CF:E8:A3:C0:AA:E1:1A:8F:FC:EE:05:C0:BD:DF:08:C6");
              put("letsencryptisrgx2 [jdk]",
                      "69:72:9B:8E:15:A8:6E:FC:17:7A:57:AF:B7:17:1D:FC:64:AD:D2:8C:2F:CA:8C:F1:50:7E:34:45:3C:CB:14:70");
@@ -152,7 +151,7 @@ index 122a01901..c131bd493 100644
              put("quovadisrootca1g3 [jdk]",
                      "8A:86:6F:D1:B2:76:B5:7E:57:8E:92:1C:65:82:8A:2B:ED:58:E9:F2:F2:88:05:41:34:B7:F1:F4:BF:C9:CC:74");
              put("quovadisrootca2 [jdk]",
-@@ -282,12 +276,6 @@ public class VerifyCACerts {
+@@ -291,12 +285,6 @@ public class VerifyCACerts {
              add("addtrustexternalca [jdk]");
              // Valid until: Sat May 30 10:44:50 GMT 2020
              add("addtrustqualifiedca [jdk]");
diff --git a/jdk-updates-jdk11u-jdk-11.0.24-ga.tar.xz b/jdk-updates-jdk11u-jdk-11.0.25-ga.tar.xz
similarity index 82%
rename from jdk-updates-jdk11u-jdk-11.0.24-ga.tar.xz
rename to jdk-updates-jdk11u-jdk-11.0.25-ga.tar.xz
index c3b680936f71b4b5745b63aeccf1de5513b3e85a..0a86b9fd29777e296b596675ea94439254bc3b22 100644
Binary files a/jdk-updates-jdk11u-jdk-11.0.24-ga.tar.xz and b/jdk-updates-jdk11u-jdk-11.0.25-ga.tar.xz differ
diff --git a/openjdk-11.spec b/openjdk-11.spec
index b916c2d581af402501b8f69353be3e1760308b20..fe0ee58f830163409c105dc511076155aef828d1 100644
--- a/openjdk-11.spec
+++ b/openjdk-11.spec
@@ -22,6 +22,9 @@
 # Enable release builds by default on relevant arches.
 %bcond_without release
 
+# Disable global LTO
+%define _lto_cflags %{nil}
+
 # The -g flag says to use strip -g instead of full strip on DSOs or EXEs.
 # This fixes detailed NMT and other tools which need minimal debug info.
 # See: https://bugzilla.redhat.com/show_bug.cgi?id=1520879
@@ -125,14 +128,14 @@
 
 # New Version-String scheme-style defines
 %global majorver 11
-%global securityver 24
+%global securityver 25
 # buildjdkver is usually same as %%{majorver},
 # but in time of bootstrap of next jdk, it is majorver-1,
 # and this it is better to change it here, on single place
 %global buildjdkver %{majorver}
 
 %ifnarch loongarch64 ppc64le
-%global vendor_version_string Bisheng
+%global vendor_version_string BiSheng
 %endif
 %ifarch loongarch64
 %global vendor_version_string Loongson
@@ -146,12 +149,12 @@
 %global origin_nice     OpenJDK
 %global top_level_dir_name   %{origin}
 %global minorver        0
-%global buildver        8
+%global buildver        9
 %global patchver	0
 
 %global project		jdk-updates
 %global repo		jdk11u
-%global revision	jdk-11.0.24-ga
+%global revision	jdk-11.0.25-ga
 %global full_revision %{project}-%{repo}-%{revision}
 # priority must be 7 digits in total
 # setting to 1, so debug ones can have 0
@@ -695,6 +698,9 @@ Provides: java-%{origin}-headless%{?1} = %{epoch}:%{version}-%{release}
 Provides: jre-%{origin}-headless%{?1} = %{epoch}:%{version}-%{release}
 Provides: java-headless%{?1} = %{epoch}:%{version}-%{release}
 Provides: jre-headless%{?1} = %{epoch}:%{version}-%{release}
+
+# To fix /usr/bin/jjs not provided 
+Provides: /usr/bin/jjs
 }
 
 %define java_devel_rpo() %{expand:
@@ -762,7 +768,7 @@ Provides: java-src%{?1} = %{epoch}:%{version}-%{release}
 
 Name:    java-%{javaver}-%{origin}
 Version: %{newjavaver}.%{buildver}
-Release: 0
+Release: 3
 # java-1.5.0-ibm from jpackage.org set Epoch to 1 for unknown reasons
 # and this change was brought into RHEL-4. java-1.5.0-ibm packages
 # also included the epoch in their virtual provides. This created a
@@ -791,7 +797,7 @@ Group:   Development/Languages
 # The test code includes copies of NSS under the Mozilla Public License v2.0
 # The PCSClite headers are under a BSD with advertising license
 # The elliptic curve cryptography (ECC) source code is licensed under the LGPLv2.1 or any later version
-License:  ASL 1.1 and ASL 2.0 and BSD and BSD with advertising and GPL+ and GPLv2 and GPLv2 with exceptions and IJG and LGPLv2+ and MIT and MPLv2.0 and Public Domain and W3C and zlib and ISC and FTL and RSA
+License:  ASL 1.1 and ASL 2.0 and BSD and BSD with advertising and GPL+ and GPLv2 and GPLv2 with exceptions and IJG and LGPLv2+ and MIT and MPLv2.0 and Public Domain and W3C and zlib and ISC and FTL and RSA-MD
 URL:      http://openjdk.java.net/
 
 
@@ -808,15 +814,6 @@ Source9: jconsole.desktop.in
 # nss configuration file
 Source11: nss.cfg.in
 
-############################################
-#
-# RPM/distribution specific patches
-#
-############################################
-# NSS via SunPKCS11 Provider (disabled comment
-# due to memory leak).
-Patch1000: rh1648249-add_commented_out_nss_cfg_provider_to_java_security.patch
-
 ############################################
 #
 # LoongArch64 specific patches
@@ -902,6 +899,9 @@ Patch92: 8295068-SSLEngine-throws-NPE-parsing-Certificate.patch
 
 # 11.0.23
 Patch93: Cache-byte-when-constructing-String-with-duplicate-c.patch
+
+# 11.0.25
+Patch94: change-ActivePrcoessorCount-only-for-HBase.patch
 ############################################
 #
 # riscv64 specific patches
@@ -1192,6 +1192,7 @@ pushd %{top_level_dir_name}
 %patch91 -p1
 %patch92 -p1
 %patch93 -p1
+%patch94 -p1
 %endif
 %endif
 %ifarch loongarch64
@@ -1199,8 +1200,6 @@ pushd %{top_level_dir_name}
 %endif
 popd # openjdk
 
-# %patch1000
-
 # Extract systemtap tapsets
 %if %{with_systemtap}
 tar --strip-components=1 -x -I xz -f %{SOURCE8}
@@ -1289,7 +1288,7 @@ bash ../configure \
     --with-version-opt="" \
     --with-vendor-version-string="%{vendor_version_string}" \
 %ifnarch loongarch64 ppc64le
-    --with-vendor-name="Bisheng" \
+    --with-vendor-name="BiSheng" \
 %endif
 %ifarch loongarch64
     --with-vendor-name="Loongson" \
@@ -1552,7 +1551,12 @@ done
 -- (copy_jdk_configs from %%{_libexecdir} used) or not copied at all
 local posix = require "posix"
 
-local debug = false
+if (os.getenv("debug") == "true") then
+  debug = true;
+  print("cjc: in spec debug is on")
+else
+  debug = false;
+end
 
 SOURCE1 = "%{rpm_state_dir}/copy_jdk_configs.lua"
 SOURCE2 = "%{_libexecdir}/copy_jdk_configs.lua"
@@ -1581,8 +1585,10 @@ else
   end
 end
 -- run content of included file with fake args
-arg = {"--currentjvm", "%{uniquesuffix %{nil}}", "--jvmdir", "%{_jvmdir %{nil}}", "--origname", "%{name}", "--origjavaver", "%{javaver}", "--arch", "%{_arch}", "--temp", "%{rpm_state_dir}/%{name}.%{_arch}"}
-require "copy_jdk_configs.lua"
+arg = nil; -- it is better to null the arg up, no meter if they exists or not, and use cjc as module in unified way, instead of relaying on "main" method during require "copy_jdk_configs.lua"
+cjc = require "copy_jdk_configs.lua"
+args = {"--currentjvm", "%{uniquesuffix %{nil}}", "--jvmdir", "%{_jvmdir %{nil}}", "--origname", "%{name}", "--origjavaver", "%{javaver}", "--arch", "%{_arch}", "--temp", "%{rpm_state_dir}/%{name}.%{_arch}"}
+cjc.mainProgram(args) -- the returns from copy_jdk_configs.lua should not affect this 'main', so it should run under all circumstances, except fatal error
 
 %post
 %{post_script %{nil}}
@@ -1707,7 +1713,40 @@ require "copy_jdk_configs.lua"
 
 
 %changelog
-* Thu July 18 2024 DXwangg <wangjiawei80@huawei.com> - 1.11.0.24.8-0
+* Wed Nov 6 2024 Pan Xuefeng <panxuefeng@loongson.cn> - 1:11.0.25.9-3
+- update LoongArch64 port to 11.0.25
+
+* Wed Oct 23 2024 Dingli Zhang <dingli@iscas.ac.cn> - 1:11.0.25.9-2
+- update riscv64 port to 11.0.25
+
+* Mon Oct 21 2024 wuyafang <wuyafang@huawei.com> - 1:11.0.25.9-1
+- disable lto in spec
+- add Provides: /usr/bin/jjs
+- update license
+
+* Wed Oct 16 2024 wuyafang <wuyafang@huawei.com> - 1:11.0.25.9-0
+- upgrade to 11.0.25+9(GA)
+- change default ActiveProcessorCount only for HBase
+
+* Fri Aug 30 2024 songliyang <songliyang@kylinos.cn> - 1.11.0.24.8-6
+- update License
+
+* Thu Aug 1 2024 aoqi <aoqi@loongson.cn> - 1.11.0.24.8-5
+- update LoongArch64 port to 11.0.24
+
+* Thu Jul 29 2024 DXwangg <wangjiawei80@huawei.com> - 1.11.0.24.8-4
+- modified delete_expired_certificates.patch
+
+* Thu Jul 25 2024 songliyang <songliyang@kylinos.cn> - 1.11.0.24.8-3
+- update Loongarch support patch to fix the error while applying in prep stage
+
+* Tue Jul 23 2024 songliyang <songliyang@kylinos.cn> - 1.11.0.24.8-2
+- null the arg to solve openjdk-headless install error
+
+* Thu Jul 18 2024 Dingli Zhang <dingli@iscas.ac.cn> - 1.11.0.24.8-1
+- update riscv64 port to 11.0.24
+
+* Thu Jul 18 2024 DXwangg <wangjiawei80@huawei.com> - 1.11.0.24.8-0
 - update to 11.0.24+8(GA)
 
 * Thu Jun 20 2024 aoqi <aoqi@loongson.cn> - 1.11.0.23.9-6
@@ -1731,7 +1770,7 @@ require "copy_jdk_configs.lua"
 
 * Thu Apr 18 2024 huangjie <huangjie150@huawei.com> - 1:11.0.23.9-0
 - modified 8224675-Late-GC-barrier-insertion-for-ZGC.patch
-- modified delete_expired_certificates.patch
+- modified delete_expired_certificates.patch 
 
 * Wed Mar 13 2024 jiahua.yu <jiahua.yu@shingroup.cn> - 1:11.0.22.7-3
 - init support for arch ppc64le
@@ -1742,9 +1781,6 @@ require "copy_jdk_configs.lua"
 * Tue Feb 20 2024 Leslie Zhai <zhaixiang@loongson.cn> - 1:11.0.22.7-1
 - init support of LoongArch64
 
-* Mon Mar 25 2024 neu-mobi <liuyulong35@huawei.com> - 1:11.0.22.7-1
-- add string optimization
-
 * Wed Jan 17 2024 DXwangg <wangjiawei80@huawei.com> - 1:11.0.22.7-0
 - update to 11.0.22+7(GA)
 - modified delete_expired_certificates.patch
@@ -1756,7 +1792,7 @@ require "copy_jdk_configs.lua"
 * Thu Oct 19 2023 DXwangg <wangjiawei80@huawei.com> - 1:11.0.21.9-0
 - update to 11.0.21+9(GA)
 - modified delete_expired_certificates.patch
-- modified G1-iterate-region-by-bitmap-rather-than-obj-size-in.patch
+- modified G1-iterate-region-by-bitmap-rather-than-obj-size-in.patch 
 - modified 8210473-JEP-345-NUMA-Aware-Memory-Allocation-for-G1.patch
 - modified 8214527-AArch64-ZGC-for-Aarch64.patch
 
@@ -1787,9 +1823,6 @@ require "copy_jdk_configs.lua"
 - modified 8231441-2-AArch64-Initial-SVE-backend-support.patch
 - delete 8290705_fix_StringConcat_validate_mem_flow_asserts_with_unexpected_userStoreI.patch
 
-* Wed Dec 14 2022 DXwangg <wangjiawei80@huawei.com> - 1:11.0.17.8-1
-- downgrade copy-jdk-configs from 3.9 to 3.3 in Requires
-
 * Wed Oct 19 2022 DXwangg <wangjiawei80@huawei.com> - 1:11.0.17.8-0
 - update to 11.0.17+8(GA)
 - modified G1-iterate-region-by-bitmap-rather-than-obj-size-in.patch
diff --git a/rh1648249-add_commented_out_nss_cfg_provider_to_java_security.patch b/rh1648249-add_commented_out_nss_cfg_provider_to_java_security.patch
deleted file mode 100644
index 1b92ddcb1154713c534b003a1d9b1d6985c5e549..0000000000000000000000000000000000000000
--- a/rh1648249-add_commented_out_nss_cfg_provider_to_java_security.patch
+++ /dev/null
@@ -1,11 +0,0 @@
-diff -r 5b86f66575b7 src/share/lib/security/java.security-linux
---- openjdk/src/java.base/share/conf/security/java.security	Tue May 16 13:29:05 2017 -0700
-+++ openjdk/src/java.base/share/conf/security/java.security	Tue Jun 06 14:05:12 2017 +0200
-@@ -83,6 +83,7 @@
- #ifndef solaris
- security.provider.tbd=SunPKCS11
- #endif
-+#security.provider.tbd=SunPKCS11 ${java.home}/lib/security/nss.cfg
- 
- #
- # A list of preferred providers for specific algorithms. These providers will